Build gene co-expression network (GCN) from time-course gene expression data

Last updated on 2025-10-30.

Show code

library(dplyr)
library(dbplyr)
library(ggplot2)
for (i in list.files(here::here("R"), full.names = TRUE)) {
  source(i)
}

# SAMPLE NAME
## specify sample name
sample.names <- c(
  # dmel
  "dmel-head",
  "dmel-body",
  "dmel-heart",
  # mmus
  "mmus-brain_stem", 
  "mmus-cerebellum", 
  "mmus-hypothalamus", 
  "mmus-heart", 
  # panu
  "panu-brain_stem",
  "panu-cerebellum",
  "panu-hypothalamus",
  "panu-heart",
  "panu-scn"
)
# sample.cycles <- c("LD", "DD")

## SPECIFY THE DATASET TO BUILD GCN WITH
which.sample <- "panu-scn"

writeLines(
  glue::glue("Sample: {which.sample}")
)

Sample: panu-scn

Structure of input data:

ONE-SHOT (make_modules)

Make and plot modules

Show code

mods <- timecourseRnaseq::make_modules(
  data = data.db[[which.sample]],
  log2 = TRUE,
  id_column = "gene_name",
  min_expression = NULL,  # automatically estimated
  min_timepoints = NULL,  # automatically estimated
  method = "wgcna",
  qc = TRUE,
  sim_method = "kendall",
  soft_power = NULL,      # automatically estimated
  min_module_size = 50,
  max_modules = 16,
  merge_cutoff_similarity = 0.9,
  plot_network_min_edge = 0.5,
  plot_network = FALSE,
  tidy_modules = TRUE
  )

---------------------------------------------------
1. Log2-transform and subset 
---------------------------------------------------
Applying log2-transformation...Done.
Estimated min_expression = 3 
Estimated min_timepoints = 8 
Subsetting data...Done.
[ NOTE ]: After subsetting, 5186 of 15806 rows remain.

 Flagging genes and samples with too many missing values...
  ..step 1
All okay!Visualizing the log-transformed data
---------------------------------------------------
2. Calculate similarity of expression 
---------------------------------------------------
Running gene-gene similarity...Done.


---------------------------------------------------
3. Create adjacency matrix 
---------------------------------------------------
Performing network topology analysis to pick
  soft-thresholding power...
   Power SFT.R.sq  slope truncated.R.sq mean.k. median.k. max.k.
1      1   0.8300  2.660          0.980  2170.0    2210.0   2930
2      2   0.5320  0.790          0.908  1230.0    1240.0   2060
3      3   0.0279  0.103          0.716   804.0     790.0   1570
4      4   0.1700 -0.255          0.691   565.0     539.0   1260
5      5   0.4450 -0.460          0.777   418.0     384.0   1030
6      6   0.5980 -0.608          0.835   321.0     283.0    868
7      7   0.6760 -0.696          0.866   253.0     215.0    741
8      8   0.7370 -0.773          0.895   204.0     167.0    641
9      9   0.7770 -0.839          0.913   168.0     131.0    562
10    10   0.8130 -0.888          0.933   139.0     105.0    496
11    12   0.8460 -0.973          0.948   100.0      68.1    398
12    15   0.8700 -1.070          0.966    65.1      38.7    301
13    18   0.8770 -1.170          0.971    44.9      23.5    240
14    21   0.8710 -1.250          0.969    32.3      15.0    197

Plotting resutls from the network topology analysis...

Done.
[ NOTE, FIGURE ]: Red horizontal line indicates a signed R^2 of 0.9

Setting soft-thresholding power to: 15.
Power-transforming the gene-gene similarity matrix...Done.

---------------------------------------------------
4. Convert into topological overlap matrix (dissTOM) 
---------------------------------------------------
Creating dissTOM...Done.
Performing hierarchical clustering on dissTOM...Done.

---------------------------------------------------
5. Identify modules (clusters) 
---------------------------------------------------
Merging modules that have a correlation ≥ 0.9 ...Done.
[ NOTE, FIGURE ] Plotting identified clusters before and after merging.

Module (cluster) size:
mergedColors
          blue          brown           cyan      darkgreen       darkgrey 
           730            294            152            115            113 
   darkmagenta darkolivegreen     darkorange        darkred  darkturquoise 
            62             74            111            285            114 
        grey60      lightcyan     lightgreen    lightyellow        magenta 
           130            139            125            120            173 
        orange  paleturquoise           pink            red      royalblue 
           112             80            678            221            119 
   saddlebrown         salmon        skyblue      steelblue      turquoise 
            87            160            439             83            391 
        violet 
            79 
Merging modules that have a correlation ≥ 0.85 ...Done.
[ NOTE, FIGURE ] Plotting identified clusters before and after merging.

Module (cluster) size:
mergedColors
         brown           cyan      darkgreen       darkgrey    darkmagenta 
           294            152            234           1126             62 
darkolivegreen     darkorange        darkred  darkturquoise         grey60 
            74            111            285            114            130 
     lightcyan     lightgreen    lightyellow        magenta         orange 
           243            125            120            173            112 
 paleturquoise            red    saddlebrown      steelblue      turquoise 
            80           1111             87             83            470 
Merging modules that have a correlation ≥ 0.8 ...Done.
[ NOTE, FIGURE ] Plotting identified clusters before and after merging.

Module (cluster) size:
mergedColors
         brown           cyan      darkgreen    darkmagenta darkolivegreen 
           294            235            234             62             74 
    darkorange  darkturquoise         grey60     lightgreen    lightyellow 
          1222            114            885            125           1489 
       magenta         orange  paleturquoise    saddlebrown 
           173            112             80             87 

Cutoff used: 0.8 
Number of modules identified: 14 

Calculating module-module similarity based
  on module-eigengene-expression...Done.
Tidying module names...Done.
Plotting adjacency matrix for module-module similarity...

---------------------------------------------------
6. Tidy modules (clusters) 
---------------------------------------------------

Show code

# "Returned object"
mods |> str(max.level = 1)

List of 4
 $ modules      :List of 2
 $ module_genes : tibble [5,186 × 3] (S3: tbl_df/tbl/data.frame)
 $ adj_matrix   : 'AsIs' num [1:5186, 1:5186] 1.00 6.14e-03 3.44e-09 1.79e-14 1.36e-07 ...
  ..- attr(*, "dimnames")=List of 2
 $ adj_matrix_ME:List of 2

Modify network plot

Internal function; use ::: to call

Show code

timecourseRnaseq:::plot_adj_as_network(
  matrix = mods[["adj_matrix_ME"]][["ME"]],
  # layout = igraph::layout.sugiyama,
  layout = igraph::layout_in_circle, # changed 
  min_edge = 0.6,
  node_label_size = 1.2,
  node_size = 20,
  edge_size = 3,
  node_frame_col = "grey20",
  node_fill_col = "grey80",
  vertex.frame.width = 3
)

Visualizing a simplified representation of the circadian GCN

Annotate the network

Obtain a list of genes in each module

Show code

module_genes <- mods[["module_genes"]]

Identify rhythmic modules

Show code

db_rhy <- load_rhy_genes(
  sample = which.sample
)
###-###-###-###-###-###-###-###-
# Set your p-value of choice
# col_pval = "BH.Q"
col_pval = "default.pvalue"
# col_pval = "raw.pvalue"
###-###-###-###-###-###-###-###-
l_module_genes <- module_genes |> 
  arrange(module_identity) |> 
  group_split(module_identity) |> 
  purrr::map(
    ~ .x |> pull(gene_name)
  ) |> 
  setNames(unique(module_genes[["module_identity"]]))

l_rhy_genes <- db_rhy |> 
  purrr::map(
    ~ .x |> 
      filter(
        if_all(
          all_of(col_pval),
          ~ .x < 0.05
        )
      ) |> 
      filter(
        ID %in% unlist(l_module_genes)
      ) |> 
      pull(1) |> 
      unique()
  ) |> 
  purrr::compact()

Comparison

Modules vs. Rhythmic genes

Show code

writeLines("#####################################################
How many genes are in each of my geneset of interest?
#####################################################")

#####################################################
How many genes are in each of my geneset of interest?
#####################################################

Show code

## MAKE YOUR LIST OF GENES OF INTEREST ##

# LIST ONE - WGCNA modules
list1 <- l_module_genes
writeLines("List of interesting genes #1
----------------------------
Genes in each of the identified gene-clusters or modules")

List of interesting genes #1
----------------------------
Genes in each of the identified gene-clusters or modules

Show code

sapply(list1, length)

  C1   C2   C3   C4   C5   C6   C7   C8   C9  C10  C11  C12  C13  C14 
 114  125  112   74   62  173  235 1489   87  885   80  234  294 1222

Show code

## LIST TWO - rhythmic genes
list2 <- l_rhy_genes
writeLines("List of interesting genes #2
----------------------------
Rhythmic genes identified by different algorithms")

List of interesting genes #2
----------------------------
Rhythmic genes identified by different algorithms

Show code

sapply(list2, length)

      ARS    empJTK GeneCycle       JTK        LS    meta2d      RAIN 
      178       328       484       174         1       274       745

Show code

## CHECK FOR OVERLAP
# define size of genome
size = length(unique(c(unlist(list1), unlist(list2))))
# make a GOM object
gom.1v2 <- GeneOverlap::newGOM(
  list2, 
  list1,
  genome.size = size
)
# png(paste0(path_to_repo, "/results/figures/",
#            "02_pogo_GCN/",
#            sample.name[1],"_gom_1v2.png"),
#     width = 35, height = 15, units = "cm", res = 300)
GeneOverlap::drawHeatmap(
  gom.1v2,
  adj.p = TRUE,
  cutoff=0.05,
  what="odds.ratio",
  # what="Jaccard",
  log.scale = T,
  note.col = "black",
  grid.col = "Oranges"
)

Show code

# trash <- dev.off()

 # writeLines("How many genes exactly are overlapping between the pairwise comparisons")
# getMatrix(gom.1v4, name = "intersection") %>% t()

writeLines("Visualizing the significant overlaps between your lists of interesting genes and the identified modules")

Visualizing the significant overlaps between your lists of interesting genes and the identified modules

Network statistics

From WGCNA-tutorial

Intramodular connectivity

“We begin by calculating the intramodular connectivity for each gene. (In network literature, connectivity is often referred to as”degree”.) The function intramodularConnectivity computes:

the whole network connectivity kTotal,
the within (intra)module connectivity kWithin,
the extra-modular connectivity kOut=kTotal-kWithin, and
the difference of the intra- and extra-modular connectivities kDiff = kIn - kOut = 2*kIN-kTotal

Show code

# From what I can tell, colorh1 in the tutorial refers to moduleColors
colorh1 <- mods[["modules"]]$colors
adj_matrix <- mods[["adj_matrix"]]

# Calculate the connectivities of the genes
Alldegrees1 = WGCNA::intramodularConnectivity(
  adjMat = adj_matrix, 
  colors = colorh1
) |> 
  mutate(
    gene_name = rownames(adj_matrix),
    across(
      matches("^k"),
      ~ round(.x, 2)
    )
  ) |> 
  glimpse()

Rows: 5,186
Columns: 5
$ kTotal    <dbl> 16.15, 39.72, 10.03, 65.04, 28.34, 14.51, 90.39, 27.68, 15.5…
$ kWithin   <dbl> 5.25, 37.48, 8.05, 59.96, 12.64, 12.34, 86.71, 26.15, 12.28,…
$ kOut      <dbl> 10.90, 2.24, 1.98, 5.09, 15.70, 2.16, 3.67, 1.53, 3.28, 3.41…
$ kDiff     <dbl> -5.65, 35.24, 6.07, 54.87, -3.06, 10.18, 83.04, 24.61, 9.00,…
$ gene_name <chr> "ENSPANG00000000040", "ENSPANG00000000041", "ENSPANG00000000…

Plotting the mean (± 95% CI) connectivity of genes in different modules

Show code

pd <- position_dodge(0.1)

# which_var <- "kTotal"
which_var <- c("kTotal", "kWithin", "kOut", "kDiff")

Alldegrees1 |> 
  # rownames_to_column("gene_name") %>% 
  left_join(
    module_genes, 
    join_by(gene_name)
  ) |> 
  glimpse() |> 
  # PLOT FROM RAW DATA
  mutate(
    module_identity = factor(
      module_identity, 
      levels = paste0(
        "C",
        sort(
          unique(module_genes$module_identity) |> 
            stringr::str_replace("C", "") |> 
            as.integer()
        )
      ) |> rev()
    )
  ) |> 
  summarySE(
    measurevar = which_var, 
    groupvars = "module_identity"
  ) |> 
  mutate(
    type = factor(
      type,
      levels = which_var
    )
  ) |> 
  # Plot
  ggplot(
    aes(
      y = module_identity, 
      x = mean,
      group = interaction(module_identity, type)
    )
  ) +
  geom_vline(xintercept = 0, col = "maroon", alpha = 0.7) +
  labs(
    y = "",
    x = glue::glue(
      "Connectivity"
    ),
    title = ""
  ) +
  ## Add error bar here
  geom_errorbar(
    aes(xmin = mean-ci, xmax = mean+ci),
    width = 0.3, 
    position=pd, 
    lwd = 1.3,
    col="black", 
    alpha = 1
  ) +
  # Add the points on top of the error bars
  geom_point(
    position = pd, 
    size = 3,
    col = "black", 
    fill = "orange",
    show.legend = F, 
    pch = 21,
    alpha = 0.9
  ) +
  facet_wrap(
    ~ type,
    nrow = 2,
    scales = "free_x"
  ) +
  scale_x_continuous(
    n.breaks = 4
  ) +
  theme_bw(25) +
  # scale_color_manual(values=c("#F20505","#F5D736","#0FBF67")) +
  theme(
    legend.position = "none"
  )

Rows: 5,186
Columns: 7
$ kTotal          <dbl> 16.15, 39.72, 10.03, 65.04, 28.34, 14.51, 90.39, 27.68…
$ kWithin         <dbl> 5.25, 37.48, 8.05, 59.96, 12.64, 12.34, 86.71, 26.15, …
$ kOut            <dbl> 10.90, 2.24, 1.98, 5.09, 15.70, 2.16, 3.67, 1.53, 3.28…
$ kDiff           <dbl> -5.65, 35.24, 6.07, 54.87, -3.06, 10.18, 83.04, 24.61,…
$ gene_name       <chr> "ENSPANG00000000040", "ENSPANG00000000041", "ENSPANG00…
$ module_identity <chr> "C10", "C5", "C3", "C3", "C7", "C5", "C3", "C5", "C5",…
$ old_labels      <chr> "lightgreen", "grey60", "lightyellow", "lightyellow", …