# Define the functiondat <-log2_transform_data(data = data.db[[which.sample]],id_column ="gene_name")
Applying log2-transformation...Done.
(SUBSET) Filter data
Show code
# Default: min_expression and min_timepointsmin_expression <- dat |>select(-gene_name) |>summarize(across(everything(), ~mean(.x, na.rm =TRUE))) |>rowMeans() |>ceiling()min_timepoints <-ceiling(ncol(dat)*(2/3))
Show code
# Which genes are expressed throughout the day in forager heads?# count the number of time points that has ≥ 1 FPKM# subset the data and only keep the filtered genesdat_sub <- dat |>subset_data(min_expression = min_expression,min_timepoints = min_timepoints,id_column ="gene_name" )
Subsetting data...Done.
[ NOTE ]: After subsetting, 5568 of 12451 rows remain.
Show code
writeLines("Dimensions of the data post-filtering step [Rows = #genes, Cols = #samples]")
Dimensions of the data post-filtering step [Rows = #genes, Cols = #samples]
# Construct adjacency matrix# TO DO: make function ----adj_matrix <- WGCNA::adjacency.fromSimilarity( sim_matrix,power = soft.power,type ="signed") |>as.matrix()cat("After power transformation:")
After power transformation:
Show code
plot_sim_matrix(matrix = adj_matrix)
Plotting a chunk of the gene-gene similarity matrix with 200 genes.
Identify gene clusters
The following steps are performed as per guidelines from the WGCNA package and several tutorials made available online.
2.1 Create topological overalp matrix
Show code
# Turn adjacency into topological overlapdissTOM =1- WGCNA::TOMsimilarity(adj_matrix)
Calculating module-module similarity based
on module-eigengene-expression...Done.
Tidying module names...Done.
Plotting adjacency matrix for module-module similarity...
Visualizing a simplified representation of the circadian GCN
Save module identity
Obtain a list of genes in each module
Show code
module_genes <-tidy_modules(merged_modules = merge[["colors"]],mapping_tbl = adj_matrix_ME$mapping_tbl,data = datExpr)# saveRDS(# module_genes,# here::here(# glue::glue(# "./data/tmp/{which.sample}_module_genes.RDS"# )# )# )# TO DO:# Save the `dat_module_gene` as a database table in the respective database.
writeLines("#####################################################How many genes are in each of my geneset of interest?#####################################################")
#####################################################
How many genes are in each of my geneset of interest?
#####################################################
Show code
## MAKE YOUR LIST OF GENES OF INTEREST ### LIST ONE - WGCNA moduleslist1 <- l_module_geneswriteLines("List of interesting genes #1----------------------------Genes in each of the identified gene-clusters or modules")
List of interesting genes #1
----------------------------
Genes in each of the identified gene-clusters or modules
Show code
sapply(list1, length)
C1 C2 C3 C4 C5
1250 654 503 636 118
Show code
## LIST TWO - rhythmic geneslist2 <- l_rhy_geneswriteLines("List of interesting genes #2----------------------------Rhythmic genes identified by different algorithms")
List of interesting genes #2
----------------------------
Rhythmic genes identified by different algorithms
## CHECK FOR OVERLAP# define size of genomesize =length(unique(c(unlist(list1), unlist(list2))))# make a GOM objectgom.1v2 <- GeneOverlap::newGOM( list2, list1,genome.size = size)# png(paste0(path_to_repo, "/results/figures/",# "02_pogo_GCN/",# sample.name[1],"_gom_1v2.png"),# width = 35, height = 15, units = "cm", res = 300)GeneOverlap::drawHeatmap( gom.1v2,adj.p =TRUE,cutoff=0.05,what="odds.ratio",# what="Jaccard",log.scale = T,note.col ="black",grid.col ="Oranges")
Show code
# trash <- dev.off()# writeLines("How many genes exactly are overlapping between the pairwise comparisons")# getMatrix(gom.1v4, name = "intersection") %>% t()writeLines("Visualizing the significant overlaps between your lists of interesting genes and the identified modules")
Visualizing the significant overlaps between your lists of interesting genes and the identified modules
“We begin by calculating the intramodular connectivity for each gene. (In network literature, connectivity is often referred to as”degree”.) The function intramodularConnectivity computes:
the whole network connectivity kTotal,
the within (intra)module connectivity kWithin,
the extra-modular connectivity kOut=kTotal-kWithin, and
the difference of the intra- and extra-modular connectivities kDiff = kIn - kOut = 2*kIN-kTotal
Show code
# From what I can tell, colorh1 in the tutorial refers to moduleColorscolorh1 <- merge$colors# Calculate the connectivities of the genesAlldegrees1 = WGCNA::intramodularConnectivity(adjMat = adj_matrix, colors = colorh1) |> tibble::rownames_to_column("gene_name") |>mutate(across(matches("^k"),~round(.x, 2) ) )
Plotting the mean (± 95% CI) connectivity of genes in different modules