Last updated on 2025-10-30.
Show code
library (dplyr)
library (dbplyr)
library (ggplot2)
for (i in list.files (here:: here ("R" ), full.names = TRUE )) {
source (i)
}
# SAMPLE NAME
## specify sample name
sample.names <- c (
# # dmel
# "dmel-head",
# "dmel-body",
# "dmel-heart"
# # mmus
# "mmus-brain_stem",
# "mmus-cerebellum",
# "mmus-hypothalamus",
# "mmus-heart"
# # panu
# "panu-brain_stem",
# "panu-cerebellum",
# "panu-hypothalamus",
# "panu-heart",
# "panu-scn"
# agam
"agam-head" ,
"agam-body"
)
# sample.cycles <- c("LD", "DD")
## SPECIFY THE DATASET TO BUILD GCN WITH
ref.sample <- "agam-head"
writeLines (
glue:: glue ("Reference tissue: {ref.sample}" )
)
Reference tissue: agam-head
Prep data
Expected format: one row per gene, one col per sample
Structure of reference tissue data:
ONE-SHOT (make_modules)
Make modules in one call
Show code
mods <- timecourseRnaseq:: make_modules (
data = tidydata.db[[ref.sample]],
log2 = TRUE ,
id_column = "gene_name" ,
min_expression = NULL , # automatically estimated
min_timepoints = NULL , # automatically estimated
method = "wgcna" ,
qc = TRUE ,
sim_method = "kendall" ,
soft_power = NULL , # automatically estimated
min_module_size = 50 ,
max_modules = 16 ,
merge_cutoff_similarity = 0.9 ,
plot_network = FALSE ,
# plot_network_min_edge = 0.5, # used when `plot_network == TRUE`
tidy_modules = TRUE
)
---------------------------------------------------
1. Log2-transform and subset
---------------------------------------------------
Applying log2-transformation...Done.
Estimated min_expression = 3
Estimated min_timepoints = 9
Subsetting data...Done.
[ NOTE ]: After subsetting, 3819 of 11269 rows remain.
Flagging genes and samples with too many missing values...
..step 1
All okay!Visualizing the log-transformed data
---------------------------------------------------
2. Calculate similarity of expression
---------------------------------------------------
Running gene-gene similarity...Done.
---------------------------------------------------
3. Create adjacency matrix
---------------------------------------------------
Performing network topology analysis to pick
soft-thresholding power...
Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
1 1 0.5890 2.350 0.954 1320.00 1350.00 1840.0
2 2 0.1500 0.441 0.867 654.00 659.00 1150.0
3 3 0.0784 -0.220 0.815 382.00 371.00 807.0
4 4 0.4330 -0.559 0.843 246.00 227.00 607.0
5 5 0.6480 -0.752 0.906 169.00 146.00 476.0
6 6 0.7340 -0.883 0.920 122.00 98.70 385.0
7 7 0.8060 -0.970 0.940 91.20 68.80 318.0
8 8 0.8520 -1.020 0.957 70.10 49.00 267.0
9 9 0.8760 -1.060 0.965 55.10 35.80 228.0
10 10 0.8990 -1.090 0.977 44.20 26.70 198.0
11 12 0.9030 -1.170 0.973 29.70 15.60 154.0
12 15 0.9170 -1.240 0.982 17.90 7.56 113.0
13 18 0.9100 -1.330 0.974 11.60 4.03 86.9
14 21 0.9150 -1.360 0.973 7.92 2.35 68.7
Plotting resutls from the network topology analysis...
Done.
[ NOTE, FIGURE ]: Red horizontal line indicates a signed R^2 of 0.9
Setting soft-thresholding power to: 8.
Power-transforming the gene-gene similarity matrix...Done.
---------------------------------------------------
4. Convert into topological overlap matrix (dissTOM)
---------------------------------------------------
Creating dissTOM...Done.
Performing hierarchical clustering on dissTOM...Done.
---------------------------------------------------
5. Identify modules (clusters)
---------------------------------------------------
Merging modules that have a correlation ≥ 0.9 ...Done.
[ NOTE, FIGURE ] Plotting identified clusters before and after merging.
Module (cluster) size:
mergedColors
black blue brown cyan green greenyellow
299 389 354 79 327 111
lightcyan magenta midnightblue pink purple red
57 244 77 246 148 305
salmon tan turquoise yellow
80 101 653 349
Cutoff used: 0.9
Number of modules identified: 16
Calculating module-module similarity based
on module-eigengene-expression...Done.
Tidying module names...Done.
Plotting adjacency matrix for module-module similarity...
---------------------------------------------------
6. Tidy modules (clusters)
---------------------------------------------------
Show code
# saveRDS(
# mods,
# here::here(
# glue::glue(
# "result/tissue-comparison/ref_{ref.sample}timecourse-gcn.rds"
# )
# )
# )
Modify network plot
Internal function; use ::: to call
Show code
timecourseRnaseq::: plot_adj_as_network (
matrix = mods[["adj_matrix_ME" ]][["ME" ]],
# layout = igraph::layout.sugiyama, # default view
layout = igraph:: layout_in_circle, # changed
min_edge = 0.6 ,
node_label_size = 1.2 ,
node_size = 30 ,
edge_size = 3 ,
node_frame_col = "grey20" ,
node_fill_col = "grey80" ,
vertex.frame.width = 3
)
Visualizing a simplified representation of the circadian GCN
Annotate the network
Identify rhythmic modules
Show code
# Obtain a list of genes in each module
l_module_genes <- mods[["module_genes" ]] |>
arrange (module_identity) |>
group_split (module_identity) |>
purrr:: map (
~ .x |> pull (gene_name)
) |>
setNames (unique (mods[["module_genes" ]]$ module_identity))
# Load results of rhythmicity analyses
db_rhy <- purrr:: map (
sample.names,
~ load_rhy_genes (
sample = .x
)
) |>
setNames (sample.names)
# Set your p-value of choice
###- ### - ### - ### - ### - ### - ### - ### -
# col_pval = "BH.Q"
col_pval = "default.pvalue"
# col_pval = "raw.pvalue"
###- ### - ### - ### - ### - ### - ### - ### -
# Obtain a list of rhythmic genes in each tissue
l_rhy_genes <- purrr:: map (
sample.names,
~ db_rhy[[.x]] |>
purrr:: map (
~ .x |>
filter (
if_all (
all_of (col_pval),
~ .x < 0.05
)
) |>
filter (
ID %in% unlist (l_module_genes)
) |>
pull (1 ) |>
unique ()
) |>
purrr:: compact ()
) |>
setNames (sample.names)
Modules vs. Rhythmic genes
Show code
# LIST ONE - WGCNA modules
list1 <- l_module_genes
sapply (list1, length) |> print ()
C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C12 C13 C14 C15 C16
79 246 299 57 354 349 77 148 653 111 80 244 305 327 101 389
Show code
trash <- purrr:: map (
sample.names,
function (x) {
cat ("Tissue:" , x, " \n " )
## LIST TWO - rhythmic genes
list2 <- l_rhy_genes[[x]]
sapply (list2, length) |> print ()
## CHECK FOR OVERLAP
# define size of genome
size = length (unique (c (unlist (list1), unlist (list2))))
# make a GOM object
gom.1 v2 <- GeneOverlap:: newGOM (
list2,
list1,
genome.size = size
)
# GeneOverlap::drawHeatmap(
# gom.1v2,
# # adj.p = TRUE,
# # cutoff = 0.05,
# # what = "odds.ratio",
# # # what="Jaccard",
# # log.scale = T,
# # note.col = "black",
# grid.col = "Oranges"
# )
gom.1 v2
}
) |>
setNames (sample.names)
Tissue: agam-head
ARS empJTK GeneCycle JTK LS meta2d RAIN
2074 1978 1665 1719 1093 1636 2265
Tissue: agam-body
ARS empJTK GeneCycle JTK LS meta2d RAIN
1174 1606 1420 1333 607 1210 2151
Module preservation
Prep data
Show code
# tidydata.db |>
# purrr::map_int(
# ~ nrow(.x)
# )
# We work with two sets:
nSets = 2 ;
# Find the common set of genes across all samples
common_genes <- tidydata.db |>
purrr:: map (
~ .x |>
pull (gene_name) |>
unique ()
) |>
purrr:: reduce (
intersect
) |>
intersect (
mods[["module_genes" ]]$ gene_name
)
# filter to keep only genes used in creating REF network
samples.list <- tidydata.db |>
purrr:: map (
~ .x |>
filter (
gene_name %in% common_genes
)
)
test_samples <- setdiff (sample.names, ref.sample)
# OPTION 1: Read previous run from memory
l_mp <- readRDS (
here:: here (
glue:: glue (
"result/tissue-comparison/ref_{ref.sample}_module-preservation.rds"
)
)
)
# # OPTION 2: Run module preservation for each tissue pair (Takes time)
# l_mp <- purrr::map(
# test_samples,
# function (x) {
# multiExpr <- prep_data_module_preservation(
# data = samples.list,
# ref = ref.sample,
# test = x
# )
#
# # CHECKS
# #---#---#---#---#---#---#---#---#---#---#---#---
# # Check that the data has the correct format for many functions
# # operating on multiple sets:
# exprSize = WGCNA::checkSets(multiExpr)
# # Check that all genes and samples have sufficiently low numbers of
# # missing values.
# gsg = WGCNA::goodSamplesGenesMS(multiExpr, verbose = 1);
# cat("All samples okay?\n", gsg$allOK, "\n")
# #---#---#---#---#---#---#---#---#---#---#---#---
#
# # prep data
# #---#---#---#---#---#---#---#---#---#---#---#---
# # Expression data
# multiExpr_1 = list(
# ref = list(data = multiExpr[[1]]$data),
# test = list(data = multiExpr[[2]]$data)
# )
# ## filter to keep only the genes that we are working with
# mod.identity <- mods[["module_genes"]] |>
# select(
# gene_name,
# old_labels
# ) |>
# filter(
# gene_name %in% common_genes
# ) |>
# # !!this step is necessary!! #
# arrange(gene_name)
# ## specify the module identity of the genes
# moduleColors <- mod.identity |> pull(old_labels)
# multiColor = list(ref = moduleColors)
#
# # Run module preservation
# #---#---#---#---#---#---#---#---#---#---#---#---
# mp = WGCNA::modulePreservation(
# multiExpr_1,
# multiColor,
# referenceNetworks = 1,
# nPermutations = 100,
# calculateQvalue = FALSE,
# quickCor = 0,
# verbose = 1
# )
#
# mp
# }
# )
#
# saveRDS(
# l_mp,
# here::here(
# glue::glue(
# "result/tissue-comparison/ref_{ref.sample}_module-preservation.rds"
# )
# )
# )
Show code
l_zsummary <- purrr:: map2 (
l_mp,
test_samples,
function (mp, y) {
ref = 1
test = 2
statsObs = cbind (
mp$ quality$ observed[[ref]][[test]],
mp$ preservation$ observed[[ref]][[test]]
)
statsZ = cbind (
mp$ quality$ Z[[ref]][[test]][,- 1 ],
mp$ preservation$ Z[[ref]][[test]][,- 1 ]
)
# Compare preservation to quality:
z.stats <- cbind (
statsObs[, c ("moduleSize" , "medianRank.pres" , "medianRank.qual" )],
signif (statsZ[, c ("Zsummary.pres" , "Zsummary.qual" )], 2 )
)
mains = c (
"Preservation Median rank" ,
"Preservation Zsummary"
)
pd <- position_dodge (0.5 )
# Plot
out <- z.stats |>
tibble:: rownames_to_column ("old_labels" ) |>
left_join (
distinct (mods[["module_genes" ]][- 1 ]),
by= "old_labels"
) |>
select (
module_name = module_identity,
module_size = moduleSize,
everything ()
) |>
filter (
! is.na (module_name)
) |>
mutate (
module_name = factor (
module_name,
levels = unique (mods[["module_genes" ]]$ module_identity)
)
)
## PLOT PRESERVATION Z-SUMMARY
p <- out |>
ggplot (aes (x= log10 (module_size), y= Zsummary.pres)) +
geom_hline (yintercept = c (2 ,10 ), col= "darkred" , alpha= 0.5 ) +
geom_point (
alpha= 0.5 ,
size= 20 ,
col= "lightgrey" ,
position = pd
) +
geom_text (
aes (label= module_name),
check_overlap = TRUE ,
size = 5
) +
theme_bw (20 ) +
scale_x_continuous (limits = c (0 ,max (log10 (1000 ))+ 0.5 ),
breaks = c (0 ,1 ,2 ,3 ),
labels = c ("0" ,"10" ,"100" ,"1000" )) +
xlab ("module size (genes)" ) +
ylab (mains[2 ]) +
ggtitle (paste0 ("Test: " , y))
print (p)
out
}
) |>
setNames (
test_samples
)
Summary
Show code
z_summ <- purrr:: map_dfr (
test_samples,
~ l_zsummary[[.x]] |>
select (
modules = module_name,
zsumm = Zsummary.pres
) |>
mutate (
# ref = which.sample,
tissue = .x,
.before = 1
)
) |>
as_tibble ()
z_summ |>
filter (
zsumm > 2
) |>
arrange (
desc (zsumm)
) |>
head (6 ) |>
tidyr:: pivot_wider (
names_from = modules,
values_from = zsumm
) |>
mutate (
across (
! tissue,
~ if_else (
is.na (.x),
"-" ,
round (.x, 2 ) |> as.character ()
)
)
) |>
select (
tissue,
any_of (
paste0 ("C" , 1 : 20 )
)
) |>
kableExtra:: kable ()
agam-body
5.1
8.8
7
4.2
5
3.9
Network statistics
From WGCNA-tutorial
Intramodular connectivity
“We begin by calculating the intramodular connectivity for each gene. (In network literature, connectivity is often referred to as”degree”.) The function intramodularConnectivity computes:
the whole network connectivity kTotal ,
the within (intra)module connectivity kWithin ,
the extra-modular connectivity kOut =kTotal-kWithin, and
the difference of the intra- and extra-modular connectivities kDiff = kIn - kOut = 2*kIN-kTotal
Show code
# From what I can tell, colorh1 in the tutorial refers to moduleColors
colorh1 <- mods[["modules" ]]$ colors
adj_matrix <- mods[["adj_matrix" ]]
# Calculate the connectivities of the genes
Alldegrees1 = WGCNA:: intramodularConnectivity (
adjMat = adj_matrix,
colors = colorh1
) |>
mutate (
gene_name = rownames (adj_matrix),
across (
matches ("^k" ),
~ round (.x, 2 )
)
)
Plotting the mean (± 95% CI) connectivity of genes in different modules
Show code
pd <- position_dodge (0.1 )
# which_var <- "kTotal"
which_var <- c ("kTotal" , "kWithin" , "kOut" , "kDiff" )
Alldegrees1 |>
# rownames_to_column("gene_name") %>%
left_join (
mods[["module_genes" ]],
join_by (gene_name)
) |>
# PLOT FROM RAW DATA
mutate (
module_identity = factor (
module_identity,
levels = paste0 (
"C" ,
sort (
unique (mods[["module_genes" ]]$ module_identity) |>
stringr:: str_replace ("C" , "" ) |>
as.integer ()
)
) |> rev ()
)
) |>
summarySE (
measurevar = which_var,
groupvars = "module_identity"
) |>
mutate (
type = factor (
type,
levels = which_var
)
) |>
# Plot
ggplot (
aes (
y = module_identity,
x = mean,
group = interaction (module_identity, type)
)
) +
geom_vline (xintercept = 0 , col = "maroon" , alpha = 0.7 , lwd = 1.2 ) +
labs (
y = "" ,
x = glue:: glue (
"Connectivity"
),
title = ""
) +
## Add error bar here
geom_errorbar (
aes (xmin = mean- ci, xmax = mean+ ci),
width = 0.3 ,
position= pd,
lwd = 1.3 ,
col= "black" ,
alpha = 1
) +
# Add the points on top of the error bars
geom_point (
position = pd,
size = 3 ,
col = "black" ,
fill = "orange" ,
show.legend = F,
pch = 21 ,
alpha = 0.9
) +
facet_wrap (
~ type,
nrow = 2 ,
scales = "free_x"
) +
scale_x_continuous (
n.breaks = 4
) +
theme_bw (25 ) +
# scale_color_manual(values=c("#F20505","#F5D736","#0FBF67")) +
theme (
legend.position = "none"
)