Compare time-course gene expression data across tissues

Last updated on 2025-10-30.

Show code

library(dplyr)
library(dbplyr)
library(ggplot2)
for (i in list.files(here::here("R"), full.names = TRUE)) {
  source(i)
}

# SAMPLE NAME
## specify sample name
sample.names <- c(
  # dmel
  # "dmel-head",
  # "dmel-body",
  # "dmel-heart"
  # mmus
  "mmus-brain_stem",
  "mmus-cerebellum",
  "mmus-hypothalamus",
  "mmus-heart"
  # # panu
  # "panu-brain_stem",
  # "panu-cerebellum",
  # "panu-hypothalamus",
  # "panu-heart",
  # "panu-scn"
)
# sample.cycles <- c("LD", "DD")

## SPECIFY THE DATASET TO BUILD GCN WITH
ref.sample <- "mmus-brain_stem"

writeLines(
  glue::glue("Reference tissue: {ref.sample}")
)

Reference tissue: mmus-brain_stem

Prep data

Expected format: one row per gene, one col per sample

Structure of reference tissue data:

ONE-SHOT (make_modules)

Make modules in one call

Show code

mods <- timecourseRnaseq::make_modules(
  data = tidydata.db[[ref.sample]],
  log2 = TRUE,
  id_column = "gene_name",
  min_expression = NULL,          # automatically estimated
  min_timepoints = NULL,          # automatically estimated
  method = "wgcna",
  qc = TRUE,
  sim_method = "kendall",
  soft_power = NULL,              # automatically estimated
  min_module_size = 50,
  max_modules = 16,
  merge_cutoff_similarity = 0.9,
  plot_network = FALSE,
  # plot_network_min_edge = 0.5,  # used when `plot_network == TRUE`
  tidy_modules = TRUE
  )

---------------------------------------------------
1. Log2-transform and subset 
---------------------------------------------------
Applying log2-transformation...Done.
Estimated min_expression = 8 
Estimated min_timepoints = 6 
Subsetting data...Done.
[ NOTE ]: After subsetting, 8893 of 17406 rows remain.

 Flagging genes and samples with too many missing values...
  ..step 1
All okay!Visualizing the log-transformed data
---------------------------------------------------
2. Calculate similarity of expression 
---------------------------------------------------
Running gene-gene similarity...Done.


---------------------------------------------------
3. Create adjacency matrix 
---------------------------------------------------
Performing network topology analysis to pick
  soft-thresholding power...
   Power SFT.R.sq   slope truncated.R.sq mean.k. median.k. max.k.
1      1   0.7240  1.9500          0.654    4510    4900.0   6020
2      2   0.5090  0.5730          0.389    2970    3230.0   4790
3      3   0.0298  0.0544          0.363    2170    2280.0   4040
4      4   0.2950 -0.1720          0.710    1690    1680.0   3510
5      5   0.6060 -0.3070          0.785    1360    1280.0   3110
6      6   0.7420 -0.3970          0.817    1130     995.0   2800
7      7   0.8110 -0.4740          0.818     951     790.0   2540
8      8   0.8540 -0.5280          0.841     815     634.0   2330
9      9   0.8860 -0.5790          0.866     708     516.0   2140
10    10   0.8960 -0.6210          0.868     621     424.0   1980
11    12   0.9040 -0.6890          0.877     489     294.0   1730
12    15   0.9040 -0.7770          0.886     359     182.0   1440
13    18   0.8990 -0.8420          0.895     275     118.0   1220
14    21   0.8930 -0.8910          0.899     216      80.2   1050

Plotting resutls from the network topology analysis...

Done.
[ NOTE, FIGURE ]: Red horizontal line indicates a signed R^2 of 0.9

Setting soft-thresholding power to: 8.
Power-transforming the gene-gene similarity matrix...Done.

---------------------------------------------------
4. Convert into topological overlap matrix (dissTOM) 
---------------------------------------------------
Creating dissTOM...Done.
Performing hierarchical clustering on dissTOM...Done.

---------------------------------------------------
5. Identify modules (clusters) 
---------------------------------------------------
Merging modules that have a correlation ≥ 0.9 ...Done.
[ NOTE, FIGURE ] Plotting identified clusters before and after merging.

Module (cluster) size:
mergedColors
 antiquewhite4        bisque4           blue         coral1         coral2 
           215            669           1938             60            287 
          cyan    darkmagenta darkolivegreen    darkorange2        darkred 
           337            117            435             96            157 
 darkseagreen4  darkslateblue    floralwhite         grey60      honeydew1 
            61             87             97            172            388 
         ivory lavenderblush3      lightcyan     lightcyan1     lightgreen 
            98            155            174             98            165 
       magenta   mediumorchid  mediumpurple3   navajowhite2         orange 
           202             55            105             76            154 
    orangered4  paleturquoise palevioletred3           pink          plum1 
           108            123            301            623            110 
        purple            red         salmon        skyblue       skyblue2 
           199            234            186            127             54 
     steelblue          white        yellow4    yellowgreen 
           123            146             50            111 
Merging modules that have a correlation ≥ 0.85 ...Done.
[ NOTE, FIGURE ] Plotting identified clusters before and after merging.

Module (cluster) size:
mergedColors
       bisque4         coral1       darkgrey    darkmagenta    darkorange2 
          1397            440           3195            117            194 
       darkred  darkseagreen4         grey60          ivory        magenta 
           157             61            172            672            325 
  mediumorchid  mediumpurple3   navajowhite2         orange palevioletred3 
           163            105             76            154            736 
         plum1    saddlebrown        skyblue       skyblue2          white 
           110            287            282             54            146 
       yellow4 
            50 
Merging modules that have a correlation ≥ 0.8 ...Done.
[ NOTE, FIGURE ] Plotting identified clusters before and after merging.

Module (cluster) size:
mergedColors
      bisque4          blue        coral1   darkmagenta   darkorange2 
         2462          3195           440           796           194 
darkseagreen4         ivory       magenta  mediumorchid mediumpurple3 
           61           748           325           163           105 
       orange      skyblue2         white       yellow4 
          154            54           146            50 

Cutoff used: 0.8 
Number of modules identified: 14 

Calculating module-module similarity based
  on module-eigengene-expression...Done.
Tidying module names...Done.
Plotting adjacency matrix for module-module similarity...

---------------------------------------------------
6. Tidy modules (clusters) 
---------------------------------------------------

Modify network plot

Internal function; use ::: to call

Show code

timecourseRnaseq:::plot_adj_as_network(
  matrix = mods[["adj_matrix_ME"]][["ME"]],
  # layout = igraph::layout.sugiyama,         # default view
  layout = igraph::layout_in_circle,          # changed
  min_edge = 0.6,
  node_label_size = 1.2,
  node_size = 30,
  edge_size = 3,
  node_frame_col = "grey20",
  node_fill_col = "grey80",
  vertex.frame.width = 3
)

Visualizing a simplified representation of the circadian GCN

Annotate the network

Identify rhythmic modules

Show code

# Obtain a list of genes in each module
l_module_genes <- mods[["module_genes"]] |> 
  arrange(module_identity) |> 
  group_split(module_identity) |> 
  purrr::map(
    ~ .x |> pull(gene_name)
  ) |> 
  setNames(unique(mods[["module_genes"]]$module_identity))

# Load results of rhythmicity analyses
db_rhy <- purrr::map(
  sample.names,
  ~ load_rhy_genes(
      sample = .x
    )
) |> 
  setNames(sample.names)

# Set your p-value of choice
###-###-###-###-###-###-###-###-
# col_pval = "BH.Q"
col_pval = "default.pvalue"
# col_pval = "raw.pvalue"
###-###-###-###-###-###-###-###-

# Obtain a list of rhythmic genes in each tissue
l_rhy_genes <- purrr::map(
  sample.names,
  ~ db_rhy[[.x]] |> 
    purrr::map(
      ~ .x |> 
        filter(
          if_all(
            all_of(col_pval),
            ~ .x < 0.05
          )
        ) |> 
        filter(
          ID %in% unlist(l_module_genes)
        ) |> 
        pull(1) |> 
        unique()
    ) |> 
    purrr::compact()
) |> 
  setNames(sample.names)

Modules vs. Rhythmic genes

Note

Show code

# LIST ONE - WGCNA modules
list1 <- l_module_genes
sapply(list1, length) |> print()

  C1   C2   C3   C4   C5   C6   C7   C8   C9  C10  C11  C12  C13  C14 
 154  163 3195  105  748  146   54  796 2462  440  194   61   50  325

Show code

trash <- purrr::map(
  sample.names,
  function(x) {
    cat("Tissue:", x, "\n")
    
    ## LIST TWO - rhythmic genes
    list2 <- l_rhy_genes[[x]]
    sapply(list2, length) |> print()
    
    ## CHECK FOR OVERLAP
    # define size of genome
    size = length(unique(c(unlist(list1), unlist(list2))))
    # make a GOM object
    gom.1v2 <- GeneOverlap::newGOM(
      list2, 
      list1,
      genome.size = size
    )
    GeneOverlap::drawHeatmap(
      gom.1v2,
      adj.p = TRUE,
      cutoff = 0.05,
      what = "odds.ratio",
      # what="Jaccard",
      log.scale = T,
      note.col = "black",
      grid.col = "Oranges"
    )
    
    gom.1v2
  }
) |> 
  setNames(sample.names)

Tissue: mmus-brain_stem 
      ARS    empJTK GeneCycle       JTK    meta2d      RAIN 
     1010       361       368       198       783       687

Tissue: mmus-cerebellum 
      ARS    empJTK GeneCycle       JTK    meta2d      RAIN 
     2663       701       641       241      1952       593

Tissue: mmus-hypothalamus 
      ARS    empJTK GeneCycle       JTK    meta2d      RAIN 
     2042       559       481       223      1294       609

Tissue: mmus-heart 
      ARS    empJTK GeneCycle       JTK    meta2d      RAIN 
     2550       987       785       532      2251      1316

Summary figure

Show code

odds_ratio <- purrr::map_dfr(
  sample.names,
  ~ trash[[.x]] |> 
    GeneOverlap::getMatrix(
      "odds.ratio"
    ) |> 
    as.data.frame() |> 
    tibble::rownames_to_column("test") |> 
    tidyr::pivot_longer(
      cols = !test,
      names_to = "modules",
      values_to = "odds.ratio"
    ) |> 
    mutate(
      tissue = .x,
      .before = 1
    )
)

pvalues <- purrr::map_dfr(
  sample.names,
  ~ trash[[.x]] |> 
    GeneOverlap::getMatrix(
      "pval"
    ) |> 
    as.data.frame() |> 
    tibble::rownames_to_column("test") |> 
    tidyr::pivot_longer(
      cols = !test,
      names_to = "modules",
      values_to = "pval"
    ) |> 
    mutate(
      tissue = .x,
      .before = 1
    )
)

rhy_genes <- purrr::map_dfr(
  sample.names,
  function (x) {
    algo <- names(l_rhy_genes[[x]])
    tibble(
      tissue = x,
      test = algo,
      n_rhy_genes = sapply(l_rhy_genes[[x]], length)
    )
  }
)

left_join(
  odds_ratio,
  pvalues,
  join_by(test, modules, tissue)
) |> 
  left_join(
    rhy_genes,
    join_by(tissue, test)
  ) |> 
  # clean data
  filter(
    n_rhy_genes > 40
  ) |> 
  # check significance
  mutate(
    sig = if_else(
      pval < 0.05,
      TRUE,
      FALSE
    )
  ) |> 
  # STEP ONE
  group_by(tissue, modules) |> 
  reframe(
    prop_sig = sum(sig) / n(),
    tot_algo = n()
  ) |> 
  # STEP TWO
  filter(
    tot_algo > 3 &
      prop_sig > 3/4
  ) |> 
  # TRANSFORM / VISUALIZE
  select(- tot_algo) |> 
  tidyr::pivot_wider(
    names_from = modules,
    values_from = prop_sig
  ) |> 
  mutate(
    across(
      !tissue,
      ~ if_else(
        is.na(.x), 
        "-", 
        paste0(
          round(.x, 2)*100,
          "%"
        )
      )
    )
  ) |> 
  select(
    tissue,
    any_of(
      paste0("C", 1:20)
    )
  ) |> 
  kableExtra::kable()

tissue	C1	C5	C8	C11	C12	C13
mmus-brain_stem	100%	100%	100%	83%	100%	83%
mmus-cerebellum	83%	-	-	-	-	83%
mmus-hypothalamus	83%	83%	-	-	-	-

Module preservation

Prep data

Show code

# tidydata.db |> 
#   purrr::map_int(
#     ~ nrow(.x)
#   )

# We work with two sets:
nSets = 2;

# Find the common set of genes across all samples
common_genes <- tidydata.db |> 
  purrr::map(
    ~ .x |> 
      pull(gene_name) |> 
      unique()
  ) |> 
  purrr::reduce(
    intersect
  ) |> 
  intersect(
    mods[["module_genes"]]$gene_name
  ) 

# filter to keep only genes used in creating REF network
samples.list <- tidydata.db |> 
  purrr::map(
    ~ .x |> 
      filter(
        gene_name %in% common_genes
      )
  )

test_samples <- setdiff(sample.names, ref.sample)

# OPTION 1: Read previous run from memory 
l_mp <- readRDS(
  here::here(
    glue::glue(
      "result/tissue-comparison/ref_{ref.sample}_module-preservation.rds"
    )
  )
)

# # OPTION 2: Run module preservation for each tissue pair (Takes time)
# l_mp <- purrr::map(
#   test_samples,
#   function (x) {
#     multiExpr <- prep_data_module_preservation(
#       data = samples.list,
#       ref = ref.sample, 
#       test = x
#     )
#     
#     # CHECKS
#     #---#---#---#---#---#---#---#---#---#---#---#---
#     # Check that the data has the correct format for many functions 
#     # operating on multiple sets:
#     exprSize = WGCNA::checkSets(multiExpr)
#     # Check that all genes and samples have sufficiently low numbers of 
#     # missing values.
#     gsg = WGCNA::goodSamplesGenesMS(multiExpr, verbose = 1);
#     cat("All samples okay?\n", gsg$allOK, "\n")
#     #---#---#---#---#---#---#---#---#---#---#---#---
#     
#     # prep data
#     #---#---#---#---#---#---#---#---#---#---#---#---
#     # Expression data
#     multiExpr_1 = list(
#       ref = list(data = multiExpr[[1]]$data), 
#       test = list(data = multiExpr[[2]]$data)
#     )
#     ## filter to keep only the genes that we are working with
#     mod.identity <- mods[["module_genes"]] |>
#       select(
#         gene_name,
#         old_labels
#       ) |> 
#       filter(
#         gene_name %in% common_genes
#       ) |> 
#       # !!this step is necessary!! #
#       arrange(gene_name)
#     ## specify the module identity of the genes
#     moduleColors <- mod.identity |>  pull(old_labels)
#     multiColor = list(ref = moduleColors)
#     
#     # Run module preservation
#     #---#---#---#---#---#---#---#---#---#---#---#---
#     mp = WGCNA::modulePreservation(
#       multiExpr_1, 
#       multiColor,
#       referenceNetworks = 1,
#       nPermutations = 100,
#       calculateQvalue = FALSE,
#       quickCor = 0,
#       verbose = 1
#     )
#     
#     mp
#   }
# )

# saveRDS(
#   l_mp,
#   here::here(
#     glue::glue(
#       "result/tissue-comparison/ref_{ref.sample}_module-preservation.rds"
#     )
#   )
# )

Plot results

Show code

l_zsummary <- purrr::map2(
  l_mp,
  test_samples,
  function (mp, y) {
    ref = 1
    test = 2
    statsObs = cbind(
      mp$quality$observed[[ref]][[test]], 
      mp$preservation$observed[[ref]][[test]]
    )
    statsZ = cbind(
      mp$quality$Z[[ref]][[test]][,-1], 
      mp$preservation$Z[[ref]][[test]][,-1]
    )
    
    # Compare preservation to quality:
    z.stats <- cbind(
      statsObs[, c("moduleSize", "medianRank.pres", "medianRank.qual")],
      signif(statsZ[, c("Zsummary.pres", "Zsummary.qual")], 2)
    )
    
    mains = c(
      "Preservation Median rank", 
      "Preservation Zsummary"
    )
    pd <- position_dodge(0.5)
    
    # Plot
    out <- z.stats |> 
      tibble::rownames_to_column("old_labels") |> 
      left_join(
        distinct(mods[["module_genes"]][-1]), 
        by="old_labels"
      ) |> 
      select(
        module_name = module_identity,
        module_size = moduleSize,
        everything()
      ) |> 
      filter(
        !is.na(module_name)
      ) |>
      mutate(
        module_name = factor(
          module_name, 
          levels = unique(mods[["module_genes"]]$module_identity)
        )
      )
    
    ## PLOT PRESERVATION Z-SUMMARY
    p <- out |> 
      ggplot(aes(x=log10(module_size), y=Zsummary.pres)) +
      geom_hline(yintercept = c(2,10), col="darkred", alpha=0.5) +
      geom_point(
        alpha=0.5, 
        size=20, 
        col="lightgrey",
        position = pd
      ) +
      geom_text(
        aes(label=module_name),
        check_overlap = TRUE,
        size = 5
      ) +
      theme_bw(20) +
      scale_x_continuous(limits = c(0,max(log10(1000))+0.5),
                         breaks = c(0,1,2,3),
                         labels = c("0","10","100","1000")) +
      xlab("module size (genes)") +
      ylab(mains[2]) +
      ggtitle(paste0("Test: ", y))
    
    print(p)

    out
  }
) |> 
  setNames(
    test_samples
  )

Summary

Show code

purrr::map_dfr(
  test_samples,
  ~ l_zsummary[[.x]] |> 
    select(
      modules = module_name,
      zsumm = Zsummary.pres
    ) |> 
    mutate(
      # ref = which.sample,
      tissue = .x,
      .before = 1
    )
) |> 
  as_tibble() |> 
  filter(
    zsumm > 10
  ) |> 
  tidyr::pivot_wider(
    names_from = modules,
    values_from = zsumm
  ) |> 
  mutate(
    across(
      !tissue,
      ~ if_else(
        is.na(.x), 
        "-", 
        round(.x, 2) |> as.character()
      )
    )
  ) |> 
  select(
    tissue,
    any_of(
      paste0("C", 1:20)
    )
  ) |> 
  kableExtra::kable()

tissue	C3	C4	C5	C11	C13
mmus-cerebellum	-	18	-	18	-
mmus-hypothalamus	12	20	12	23	15

Network statistics

From WGCNA-tutorial

Intramodular connectivity

“We begin by calculating the intramodular connectivity for each gene. (In network literature, connectivity is often referred to as”degree”.) The function intramodularConnectivity computes:

the whole network connectivity kTotal,
the within (intra)module connectivity kWithin,
the extra-modular connectivity kOut=kTotal-kWithin, and
the difference of the intra- and extra-modular connectivities kDiff = kIn - kOut = 2*kIN-kTotal

Show code

# From what I can tell, colorh1 in the tutorial refers to moduleColors
colorh1 <- mods[["modules"]]$colors
adj_matrix <- mods[["adj_matrix"]]

# Calculate the connectivities of the genes
Alldegrees1 = WGCNA::intramodularConnectivity(
  adjMat = adj_matrix,
  colors = colorh1
) |>
  mutate(
    gene_name = rownames(adj_matrix),
    across(
      matches("^k"),
      ~ round(.x, 2)
    )
  )

Plotting the mean (± 95% CI) connectivity of genes in different modules

Show code

pd <- position_dodge(0.1)

# which_var <- "kTotal"
which_var <- c("kTotal", "kWithin", "kOut", "kDiff")

Alldegrees1 |>
  # rownames_to_column("gene_name") %>%
  left_join(
    mods[["module_genes"]],
    join_by(gene_name)
  ) |>
  # PLOT FROM RAW DATA
  mutate(
    module_identity = factor(
      module_identity,
      levels = paste0(
        "C",
        sort(
          unique(mods[["module_genes"]]$module_identity) |>
            stringr::str_replace("C", "") |>
            as.integer()
        )
      ) |> rev()
    )
  ) |>
  summarySE(
    measurevar = which_var,
    groupvars = "module_identity"
  ) |>
  mutate(
    type = factor(
      type,
      levels = which_var
    )
  ) |>
  # Plot
  ggplot(
    aes(
      y = module_identity,
      x = mean,
      group = interaction(module_identity, type)
    )
  ) +
  geom_vline(xintercept = 0, col = "maroon", alpha = 0.7, lwd =1.2) +
  labs(
    y = "",
    x = glue::glue(
      "Connectivity"
    ),
    title = ""
  ) +
  ## Add error bar here
  geom_errorbar(
    aes(xmin = mean-ci, xmax = mean+ci),
    width = 0.3,
    position=pd,
    lwd = 1.3,
    col="black",
    alpha = 1
  ) +
  # Add the points on top of the error bars
  geom_point(
    position = pd,
    size = 3,
    col = "black",
    fill = "orange",
    show.legend = F,
    pch = 21,
    alpha = 0.9
  ) +
  facet_wrap(
    ~ type,
    nrow = 2,
    scales = "free_x"
  ) +
  scale_x_continuous(
    n.breaks = 4
  ) +
  theme_bw(25) +
  # scale_color_manual(values=c("#F20505","#F5D736","#0FBF67")) +
  theme(
    legend.position = "none"
  )