Compare time-course gene expression data across tissues

Last updated on 2026-04-29.

Show code

library(dplyr)
library(dbplyr)
library(ggplot2)
for (i in list.files(here::here("R"), full.names = TRUE)) {
  source(i)
}

# SAMPLE NAME
## specify sample name
sample.names <- c(
  # # dmel
  # "dmel-head",
  # "dmel-body",
  # "dmel-heart"
  # # mmus
  # "mmus-brain_stem",
  # "mmus-cerebellum",
  # "mmus-hypothalamus",
  # "mmus-heart"
  # panu
  "panu-brain_stem",
  "panu-cerebellum",
  "panu-hypothalamus",
  "panu-heart",
  "panu-scn"
)
# sample.cycles <- c("LD", "DD")

## SPECIFY THE DATASET TO BUILD GCN WITH
ref.sample <- "panu-brain_stem"

writeLines(
  glue::glue("Reference tissue: {ref.sample}")
)

Reference tissue: panu-brain_stem

Prep data

Expected format: one row per gene, one col per sample

Structure of reference tissue data:

ONE-SHOT (make_modules)

Make modules in one call

Show code

mods <- timecourseRnaseq::make_modules(
  data = tidydata.db[[ref.sample]],
  log2 = TRUE,
  id_column = "gene_name",
  min_expression = NULL,          # automatically estimated
  min_timepoints = NULL,          # automatically estimated
  method = "wgcna",
  qc = TRUE,
  sim_method = "kendall",
  soft_power = 15,              # automatically estimated
  min_module_size = 50,
  max_modules = 16,
  merge_cutoff_similarity = 0.9,
  plot_network = FALSE,
  # plot_network_min_edge = 0.5,  # used when `plot_network == TRUE`
  tidy_modules = TRUE
  )

---------------------------------------------------
1. Log2-transform and subset 
---------------------------------------------------
Applying log2-transformation...Done.
Estimated min_expression = 3 
Estimated min_timepoints = 8 
Subsetting data...Done.
[ NOTE ]: After subsetting, 4943 of 15745 rows remain.

 Flagging genes and samples with too many missing values...
  ..step 1
All okay!Visualizing the log-transformed data
---------------------------------------------------
2. Calculate similarity of expression 
---------------------------------------------------
Running gene-gene similarity...Done.


---------------------------------------------------
3. Create adjacency matrix 
---------------------------------------------------
Performing network topology analysis to pick
  soft-thresholding power...
   Power SFT.R.sq  slope truncated.R.sq mean.k. median.k. max.k.
1      1    0.779  1.860          0.978  2000.0   2020.00   2800
2      2    0.325  0.364          0.941  1110.0   1080.00   1960
3      3    0.144 -0.165          0.854   711.0    652.00   1510
4      4    0.619 -0.462          0.919   492.0    423.00   1220
5      5    0.747 -0.650          0.904   360.0    290.00   1020
6      6    0.829 -0.789          0.923   273.0    207.00    871
7      7    0.852 -0.896          0.923   213.0    153.00    757
8      8    0.850 -0.994          0.906   171.0    115.00    668
9      9    0.841 -1.070          0.892   139.0     87.60    595
10    10    0.846 -1.130          0.893   115.0     68.10    535
11    12    0.883 -1.200          0.920    81.6     42.40    441
12    15    0.904 -1.290          0.937    52.4     23.20    343
13    18    0.885 -1.360          0.925    35.7     13.40    276
14    21    0.889 -1.390          0.933    25.5      8.13    227

Plotting resutls from the network topology analysis...

Done.
[ NOTE, FIGURE ]: Red horizontal line indicates a signed R^2 of 0.9

Setting soft-thresholding power to: 15.
Power-transforming the gene-gene similarity matrix...Done.

---------------------------------------------------
4. Convert into topological overlap matrix (dissTOM) 
---------------------------------------------------
Creating dissTOM...Done.
Performing hierarchical clustering on dissTOM...Done.

---------------------------------------------------
5. Identify modules (clusters) 
---------------------------------------------------
Merging modules that have a correlation ≥ 0.9 ...Done.
[ NOTE, FIGURE ] Plotting identified clusters before and after merging.

Module (cluster) size:
mergedColors
        black          cyan     darkgreen      darkgrey    darkorange 
         1392           158            88            81            78 
      darkred darkturquoise   greenyellow        grey60    lightgreen 
          105            87           204           133           127 
  lightyellow       magenta  midnightblue        orange paleturquoise 
          127           211           150            80            56 
         pink        purple           red     royalblue   saddlebrown 
          212           204           234           124            59 
      skyblue     steelblue           tan     turquoise        violet 
           72            57           198           320            54 
        white        yellow 
           77           255 
Merging modules that have a correlation ≥ 0.85 ...Done.
[ NOTE, FIGURE ] Plotting identified clusters before and after merging.

Module (cluster) size:
mergedColors
        black          cyan     darkgreen      darkgrey       darkred 
         1392           236            88           605           105 
darkturquoise        grey60    lightgreen       magenta        orange 
          214           283           438           211            80 
paleturquoise          pink     royalblue   saddlebrown       skyblue 
           56           416           124            59            72 
    steelblue           tan        violet        yellow 
           57           198            54           255 
Merging modules that have a correlation ≥ 0.8 ...Done.
[ NOTE, FIGURE ] Plotting identified clusters before and after merging.

Module (cluster) size:
mergedColors
        black     darkgreen      darkgrey       darkred darkturquoise 
         1883            88           605           105           214 
   lightgreen       magenta        orange     royalblue   saddlebrown 
         1108           211            80           407            59 
      skyblue     steelblue        violet 
           72            57            54 

Cutoff used: 0.8 
Number of modules identified: 13 

Calculating module-module similarity based
  on module-eigengene-expression...Done.
Tidying module names...Done.
Plotting adjacency matrix for module-module similarity...

---------------------------------------------------
6. Tidy modules (clusters) 
---------------------------------------------------

Show code

# saveRDS(
#   mods,
#   here::here(
#     glue::glue(
#       "result/tissue-comparison/ref_{ref.sample}timecourse-gcn.rds"
#     )
#   )
# )

Modify network plot

Internal function; use ::: to call

Show code

timecourseRnaseq:::plot_adj_as_network(
  matrix = mods[["adj_matrix_ME"]][["ME"]],
  # layout = igraph::layout.sugiyama,         # default view
  layout = igraph::layout_in_circle,          # changed
  min_edge = 0.6,
  node_label_size = 1.2,
  node_size = 30,
  edge_size = 3,
  node_frame_col = "grey20",
  node_fill_col = "grey80",
  vertex.frame.width = 3
)

Visualizing a simplified representation of the circadian GCN

Annotate the network

Identify rhythmic modules

Show code

# Obtain a list of genes in each module
l_module_genes <- mods[["module_genes"]] |> 
  arrange(module_identity) |> 
  group_split(module_identity) |> 
  purrr::map(
    ~ .x |> pull(gene_name)
  ) |> 
  setNames(unique(mods[["module_genes"]]$module_identity))

# Load results of rhythmicity analyses
db_rhy <- purrr::map(
  sample.names,
  ~ load_rhy_genes(
      sample = .x
    )
) |> 
  setNames(sample.names)

# Set your p-value of choice
###-###-###-###-###-###-###-###-
# col_pval = "BH.Q"
col_pval = "default.pvalue"
# col_pval = "raw.pvalue"
###-###-###-###-###-###-###-###-

# Obtain a list of rhythmic genes in each tissue
l_rhy_genes <- purrr::map(
  sample.names,
  ~ db_rhy[[.x]] |> 
    purrr::map(
      ~ .x |> 
        filter(
          if_all(
            all_of(col_pval),
            ~ .x < 0.05
          )
        ) |> 
        filter(
          ID %in% unlist(l_module_genes)
        ) |> 
        pull(1) |> 
        unique()
    ) |> 
    purrr::compact()
) |> 
  setNames(sample.names)

Modules vs. Rhythmic genes

Note

Show code

# LIST ONE - WGCNA modules
list1 <- l_module_genes
sapply(list1, length) |> print()

  C1   C2   C3   C4   C5   C6   C7   C8   C9  C10  C11  C12  C13 
1108  214   59   88   57  211   80  605  105  407   72   54 1883

Show code

trash <- purrr::map(
  sample.names,
  function(x) {
    cat("Tissue:", x, "\n")
    
    ## LIST TWO - rhythmic genes
    list2 <- l_rhy_genes[[x]]
    sapply(list2, length) |> print()
    
    ## CHECK FOR OVERLAP
    # define size of genome
    size = length(unique(c(unlist(list1), unlist(list2))))
    # make a GOM object
    gom.1v2 <- GeneOverlap::newGOM(
      list2, 
      list1,
      genome.size = size
    )
    GeneOverlap::drawHeatmap(
      gom.1v2,
      adj.p = TRUE,
      cutoff = 0.05,
      what = "odds.ratio",
      # what="Jaccard",
      log.scale = T,
      note.col = "black",
      grid.col = "Oranges"
    )
    
    gom.1v2
  }
) |> 
  setNames(sample.names)

Tissue: panu-brain_stem 
      ARS    empJTK GeneCycle       JTK        LS    meta2d      RAIN 
      930       565       958       568         3      1043      1147

Tissue: panu-cerebellum 
      ARS    empJTK GeneCycle       JTK        LS    meta2d      RAIN 
       77       355       359       116         1       122       493

Tissue: panu-hypothalamus 
      ARS    empJTK GeneCycle       JTK    meta2d      RAIN 
      779      1467      1496       497       812      1917

Tissue: panu-heart 
      ARS    empJTK GeneCycle       JTK        LS    meta2d      RAIN 
      300      1809      1138       846        31      1155      2768

Tissue: panu-scn 
      ARS    empJTK GeneCycle       JTK        LS    meta2d      RAIN 
      170       323       510       175         1       269       763

Summary figure

Show code

odds_ratio <- purrr::map_dfr(
  sample.names,
  ~ trash[[.x]] |> 
    GeneOverlap::getMatrix(
      "odds.ratio"
    ) |> 
    as.data.frame() |> 
    tibble::rownames_to_column("test") |> 
    tidyr::pivot_longer(
      cols = !test,
      names_to = "modules",
      values_to = "odds.ratio"
    ) |> 
    mutate(
      tissue = .x,
      .before = 1
    )
)

pvalues <- purrr::map_dfr(
  sample.names,
  ~ trash[[.x]] |> 
    GeneOverlap::getMatrix(
      "pval"
    ) |> 
    as.data.frame() |> 
    tibble::rownames_to_column("test") |> 
    tidyr::pivot_longer(
      cols = !test,
      names_to = "modules",
      values_to = "pval"
    ) |> 
    mutate(
      tissue = .x,
      .before = 1
    )
)

rhy_genes <- purrr::map_dfr(
  sample.names,
  function (x) {
    algo <- names(l_rhy_genes[[x]])
    tibble(
      tissue = x,
      test = algo,
      n_rhy_genes = sapply(l_rhy_genes[[x]], length)
    )
  }
)

left_join(
  odds_ratio,
  pvalues,
  join_by(test, modules, tissue)
) |> 
  left_join(
    rhy_genes,
    join_by(tissue, test)
  ) |> 
  # clean data
  filter(
    n_rhy_genes > 40
  ) |> 
  # check significance
  mutate(
    sig = if_else(
      pval < 0.05,
      TRUE,
      FALSE
    )
  ) |> 
  # STEP ONE
  group_by(tissue, modules) |> 
  reframe(
    prop_sig = sum(sig) / n(),
    tot_algo = n()
  ) |> 
  # STEP TWO
  filter(
    tot_algo > 3 &
      prop_sig > 3/4
  ) |> 
  # TRANSFORM / VISUALIZE
  select(- tot_algo) |> 
  tidyr::pivot_wider(
    names_from = modules,
    values_from = prop_sig
  ) |> 
  mutate(
    across(
      !tissue,
      ~ if_else(
        is.na(.x), 
        "-", 
        paste0(
          round(.x, 2)*100,
          "%"
        )
      )
    )
  ) |> 
  select(
    tissue,
    any_of(
      paste0("C", 1:20)
    )
  ) |> 
  kableExtra::kable()

tissue	C6	C8	C13
panu-brain_stem	100%	83%	100%
panu-cerebellum	83%	-	-
panu-heart	-	-	83%
panu-hypothalamus	-	83%	-
panu-scn	-	-	83%

Module preservation

Prep data

Show code

# tidydata.db |> 
#   purrr::map_int(
#     ~ nrow(.x)
#   )

# We work with two sets:
nSets = 2;

# Find the common set of genes across all samples
common_genes <- tidydata.db |> 
  purrr::map(
    ~ .x |> 
      pull(gene_name) |> 
      unique()
  ) |> 
  purrr::reduce(
    intersect
  ) |> 
  intersect(
    mods[["module_genes"]]$gene_name
  ) 

# filter to keep only genes used in creating REF network
samples.list <- tidydata.db |> 
  purrr::map(
    ~ .x |> 
      filter(
        gene_name %in% common_genes
      )
  )

test_samples <- setdiff(sample.names, ref.sample)

# OPTION 1: Read previous run from memory
l_mp <- readRDS(
  here::here(
    glue::glue(
      "result/tissue-comparison/ref_{ref.sample}_module-preservation.rds"
    )
  )
)

# # OPTION 2: Run module preservation for each tissue pair (Takes time)
# l_mp <- purrr::map(
#   test_samples,
#   function (x) {
#     multiExpr <- prep_data_module_preservation(
#       data = samples.list,
#       ref = ref.sample,
#       test = x
#     )
# 
#     # CHECKS
#     #---#---#---#---#---#---#---#---#---#---#---#---
#     # Check that the data has the correct format for many functions
#     # operating on multiple sets:
#     exprSize = WGCNA::checkSets(multiExpr)
#     # Check that all genes and samples have sufficiently low numbers of
#     # missing values.
#     gsg = WGCNA::goodSamplesGenesMS(multiExpr, verbose = 1);
#     cat("All samples okay?\n", gsg$allOK, "\n")
#     #---#---#---#---#---#---#---#---#---#---#---#---
# 
#     # prep data
#     #---#---#---#---#---#---#---#---#---#---#---#---
#     # Expression data
#     multiExpr_1 = list(
#       ref = list(data = multiExpr[[1]]$data),
#       test = list(data = multiExpr[[2]]$data)
#     )
#     ## filter to keep only the genes that we are working with
#     mod.identity <- mods[["module_genes"]] |>
#       select(
#         gene_name,
#         old_labels
#       ) |>
#       filter(
#         gene_name %in% common_genes
#       ) |>
#       # !!this step is necessary!! #
#       arrange(gene_name)
#     ## specify the module identity of the genes
#     moduleColors <- mod.identity |>  pull(old_labels)
#     multiColor = list(ref = moduleColors)
# 
#     # Run module preservation
#     #---#---#---#---#---#---#---#---#---#---#---#---
#     mp = WGCNA::modulePreservation(
#       multiExpr_1,
#       multiColor,
#       referenceNetworks = 1,
#       nPermutations = 100,
#       calculateQvalue = FALSE,
#       quickCor = 0,
#       verbose = 1
#     )
# 
#     mp
#   }
# )
# 
# saveRDS(
#   l_mp,
#   here::here(
#     glue::glue(
#       "result/tissue-comparison/ref_{ref.sample}_module-preservation.rds"
#     )
#   )
# )

Plot results

Show code

l_zsummary <- purrr::map2(
  l_mp,
  test_samples,
  function (mp, y) {
    ref = 1
    test = 2
    statsObs = cbind(
      mp$quality$observed[[ref]][[test]], 
      mp$preservation$observed[[ref]][[test]]
    )
    statsZ = cbind(
      mp$quality$Z[[ref]][[test]][,-1], 
      mp$preservation$Z[[ref]][[test]][,-1]
    )
    
    # Compare preservation to quality:
    z.stats <- cbind(
      statsObs[, c("moduleSize", "medianRank.pres", "medianRank.qual")],
      signif(statsZ[, c("Zsummary.pres", "Zsummary.qual")], 2)
    )
    
    mains = c(
      "Preservation Median rank", 
      "Preservation Zsummary"
    )
    pd <- position_dodge(0.5)
    
    # Plot
    out <- z.stats |> 
      tibble::rownames_to_column("old_labels") |> 
      left_join(
        distinct(mods[["module_genes"]][-1]), 
        by="old_labels"
      ) |> 
      select(
        module_name = module_identity,
        module_size = moduleSize,
        everything()
      ) |> 
      filter(
        !is.na(module_name)
      ) |>
      mutate(
        module_name = factor(
          module_name, 
          levels = unique(mods[["module_genes"]]$module_identity)
        )
      )
    
    ## PLOT PRESERVATION Z-SUMMARY
    p <- out |> 
      ggplot(aes(x=log10(module_size), y=Zsummary.pres)) +
      geom_hline(yintercept = c(2,10), col="darkred", alpha=0.5) +
      geom_point(
        alpha=0.5, 
        size=20, 
        col="lightgrey",
        position = pd
      ) +
      geom_text(
        aes(label=module_name),
        check_overlap = TRUE,
        size = 5
      ) +
      theme_bw(20) +
      scale_x_continuous(limits = c(0,max(log10(1000))+0.5),
                         breaks = c(0,1,2,3),
                         labels = c("0","10","100","1000")) +
      xlab("module size (genes)") +
      ylab(mains[2]) +
      ggtitle(paste0("Test: ", y))
    
    print(p)

    out
  }
) |> 
  setNames(
    test_samples
  )

Summary

Show code

purrr::map_dfr(
  test_samples,
  ~ l_zsummary[[.x]] |> 
    select(
      modules = module_name,
      zsumm = Zsummary.pres
    ) |> 
    mutate(
      # ref = which.sample,
      tissue = .x,
      .before = 1
    )
) |> 
  as_tibble() |> 
  filter(
    zsumm > 10
  ) |> 
  tidyr::pivot_wider(
    names_from = modules,
    values_from = zsumm
  ) |> 
  mutate(
    across(
      !tissue,
      ~ if_else(
        is.na(.x), 
        "-", 
        round(.x, 2) |> as.character()
      )
    )
  ) |> 
  select(
    tissue,
    any_of(
      paste0("C", 1:20)
    )
  ) |> 
  kableExtra::kable()

tissue	C1	C4	C6	C9	C10	C12
panu-cerebellum	18	19	28	31	18	-
panu-hypothalamus	-	15	15	-	11	11
panu-heart	17	11	-	30	-	-
panu-scn	-	20	18	27	11	11

Network statistics

From WGCNA-tutorial

Intramodular connectivity

“We begin by calculating the intramodular connectivity for each gene. (In network literature, connectivity is often referred to as ”degree”.) The function intramodularConnectivity computes:

the whole network connectivity kTotal,
the within (intra)module connectivity kWithin,
the extra-modular connectivity kOut=kTotal-kWithin, and
the difference of the intra- and extra-modular connectivities kDiff = kIn - kOut = 2*kIN-kTotal

Show code

# From what I can tell, colorh1 in the tutorial refers to moduleColors
colorh1 <- mods[["modules"]]$colors
adj_matrix <- mods[["adj_matrix"]]

# Calculate the connectivities of the genes
Alldegrees1 = WGCNA::intramodularConnectivity(
  adjMat = adj_matrix,
  colors = colorh1
) |>
  mutate(
    gene_name = rownames(adj_matrix),
    across(
      matches("^k"),
      ~ round(.x, 2)
    )
  )

Plotting the mean (± 95% CI) connectivity of genes in different modules

Show code

pd <- position_dodge(0.1)

# which_var <- "kTotal"
which_var <- c("kTotal", "kWithin", "kOut", "kDiff")

Alldegrees1 |>
  # rownames_to_column("gene_name") %>%
  left_join(
    mods[["module_genes"]],
    join_by(gene_name)
  ) |>
  # PLOT FROM RAW DATA
  mutate(
    module_identity = factor(
      module_identity,
      levels = paste0(
        "C",
        sort(
          unique(mods[["module_genes"]]$module_identity) |>
            stringr::str_replace("C", "") |>
            as.integer()
        )
      ) |> rev()
    )
  ) |>
  summarySE(
    measurevar = which_var,
    groupvars = "module_identity"
  ) |>
  mutate(
    type = factor(
      type,
      levels = which_var
    )
  ) |>
  # Plot
  ggplot(
    aes(
      y = module_identity,
      x = mean,
      group = interaction(module_identity, type)
    )
  ) +
  geom_vline(xintercept = 0, col = "maroon", alpha = 0.7, lwd =1.2) +
  labs(
    y = "",
    x = glue::glue(
      "Connectivity"
    ),
    title = ""
  ) +
  ## Add error bar here
  geom_errorbar(
    aes(xmin = mean-ci, xmax = mean+ci),
    width = 0.3,
    position=pd,
    lwd = 1.3,
    col="black",
    alpha = 1
  ) +
  # Add the points on top of the error bars
  geom_point(
    position = pd,
    size = 3,
    col = "black",
    fill = "orange",
    show.legend = F,
    pch = 21,
    alpha = 0.9
  ) +
  facet_wrap(
    ~ type,
    nrow = 2,
    scales = "free_x"
  ) +
  scale_x_continuous(
    n.breaks = 4
  ) +
  theme_bw(25) +
  # scale_color_manual(values=c("#F20505","#F5D736","#0FBF67")) +
  theme(
    legend.position = "none"
  )