WGCNA on CTL04.

1. Environment Set Up

knitr::opts_chunk$set(echo = TRUE, warning = FALSE, collapse = TRUE)

Values of RMarkdown parameters

for (i in 1:length(params))
  print(paste('Parameter:', names(params)[i], ' - Value:', params[[i]], '- Class:', class(params[[i]])))
## [1] "Parameter: Dataset  - Value: CTL04 - Class: character"
## [1] "Parameter: SE  - Value: ~/DataDir/bulkRNASeq/3.DataExploration/CTL04/Output/FemaleLineSE.rds - Class: character"
## [1] "Parameter: OutputFolder  - Value: ~/DataDir/bulkRNASeq/8.WGCNA/CTL04/1_NetworkGeneration/ - Class: character"
library(RNASeqBulkExploratory) 
library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(WGCNA)
## Loading required package: dynamicTreeCut
## Loading required package: fastcluster
## 
## Attaching package: 'fastcluster'
## The following object is masked from 'package:stats':
## 
##     hclust
## 
## 
## Attaching package: 'WGCNA'
## The following object is masked from 'package:IRanges':
## 
##     cor
## The following object is masked from 'package:S4Vectors':
## 
##     cor
## The following object is masked from 'package:stats':
## 
##     cor
Dataset <- params$Dataset
OutputFolder <- params$OutputFolder

if (dir.exists(OutputFolder) == FALSE) {
 dir.create(OutputFolder, recursive=TRUE)
}

2. Data Upload

2.1 SE object

SE <- readRDS(params$SE)

2.2 Biotype selection and discard duplicated gene names

To avoid issues with downstream computations and not/unique rows, discard duplicated gene names and then set gene name as rowname.

SE_Bio <- RNASeqBulkExploratory::biotypeSelectSE(SE, lncRNA=TRUE, otherBios=NULL, showRemoved=TRUE)
## UPDATED library.size slot
## A total of 23485 rows out of 60237 were discarded, belonging to the following biotypes:
## 
##                          IG_C_gene                    IG_C_pseudogene 
##                                 14                                  9 
##                          IG_D_gene                          IG_J_gene 
##                                 30                                 18 
##                    IG_J_pseudogene                      IG_pseudogene 
##                                  3                                  1 
##                          IG_V_gene                    IG_V_pseudogene 
##                                143                                185 
##                              miRNA                           misc_RNA 
##                               1828                               2160 
##                            Mt_rRNA                            Mt_tRNA 
##                                  2                                 22 
##             polymorphic_pseudogene               processed_pseudogene 
##                                 49                              10136 
##                         pseudogene                           ribozyme 
##                                 18                                  8 
##                               rRNA                    rRNA_pseudogene 
##                                 26                                486 
##                             scaRNA                              scRNA 
##                                 48                                  1 
##                             snoRNA                              snRNA 
##                                925                               1836 
##                               sRNA                                TEC 
##                                  5                               1048 
##                          TR_C_gene                          TR_D_gene 
##                                  6                                  4 
##                          TR_J_gene                    TR_J_pseudogene 
##                                 78                                  4 
##                          TR_V_gene                    TR_V_pseudogene 
##                                106                                 33 
##   transcribed_processed_pseudogene     transcribed_unitary_pseudogene 
##                                498                                138 
## transcribed_unprocessed_pseudogene    translated_processed_pseudogene 
##                                937                                  2 
##  translated_unprocessed_pseudogene                 unitary_pseudogene 
##                                  1                                 97 
##             unprocessed_pseudogene                          vault_RNA 
##                               2579                                  1
## A total of 36752 rows out of 60237 were kept, belonging to the following biotypes:
## 
##         lncRNA protein_coding 
##          16852          19900

RemovedGenes <-  rowData(SE)[!(rownames(rowData(SE)) %in% rownames(rowData(SE_Bio))), ]

After the gene selection based on biotypes (protein-coding and lncRNAs), the dataset is structured in 36752 genes measured in 66 samples. 23485 have been discarded.

SE_Bio <- SE_Bio[!duplicated(rowData(SE_Bio)$GeneName), ]
rownames(SE_Bio) <- rowData(SE_Bio)$GeneName

2.3 Sample selection

All samples are kept for downstream analyses

SE object containing information about 36724 genes in 66 samples.

2.4 Sample Metadata and dummy variables

DummyTraits <- colData(SE_Bio)[,c(2, 5, 7, 8)]


# Treatment
for(level in unique(factor(DummyTraits$Condition))){
  DummyTraits[paste("Treat", level, sep = "_")] <- ifelse(DummyTraits$Condition == level, 1, 0)
}

# Pathway
for(level in unique(factor(DummyTraits$Pathway))){
  DummyTraits[paste("Path", level, sep = "_")] <- ifelse(DummyTraits$Pathway == level & DummyTraits$Type == 'Agonist', 1,
                                                         ifelse(DummyTraits$Pathway == level & DummyTraits$Type == 'Inhibitor', -1, NaN))
                                                         }

3. Generation of dds

CountMatrix <- assays(SE_Bio)$counts
SampleMeta <- DataFrame(colData(SE_Bio))

if(! all(rownames(SampleMeta) == colnames(CountMatrix))){
  stop('Inconsistency in count matrix and sample metadata')
}
dds <- DESeqDataSetFromMatrix(countData=assays(SE_Bio)$counts, SampleMeta, ~ Condition)
## converting counts to integer mode

# to add gene metadata
mcols(dds) <- DataFrame(mcols(dds), DataFrame(rowData(SE_Bio)))

dds
## class: DESeqDataSet 
## dim: 36724 66 
## metadata(1): version
## assays(1): counts
## rownames(36724): TSPAN6 TNMD ... AC114402.3 AC084756.2
## rowData names(9): Gene EnsGene ... Start End
## colnames(66): MTR_044_S1_CTL MTR_045_S2_CTL ... MTR_108_S63_DMSO
##   MTR_109_S64_DMSO
## colData names(20): InternalUniqueID HRID ... SeqPlatform lib.size

4. Gene filtering

Threshold of expression of at least 40 counts in at least 4 samples.

keep <- rowSums(counts(dds)>=40) >= 4

table(keep)
## keep
## FALSE  TRUE 
## 20222 16502
dds <- dds[keep,]

dds
## class: DESeqDataSet 
## dim: 16502 66 
## metadata(1): version
## assays(1): counts
## rownames(16502): TSPAN6 TNMD ... AC013477.2 AC005192.1
## rowData names(9): Gene EnsGene ... Start End
## colnames(66): MTR_044_S1_CTL MTR_045_S2_CTL ... MTR_108_S63_DMSO
##   MTR_109_S64_DMSO
## colData names(20): InternalUniqueID HRID ... SeqPlatform lib.size

A dds object containing information about 16502 genes in 66 samples is tested for differential expression.


5. Data wrangling

5.1 Size Factor calculation

dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
sizeFactors(dds)
##        MTR_044_S1_CTL        MTR_045_S2_CTL        MTR_046_S3_CTL 
##             0.9594736             0.8280515             0.7922835 
##        MTR_047_S4_CTL      MTR_048_S65_DMSO       MTR_049_S5_DMSO 
##             0.8583740             1.1528008             0.8910676 
##       MTR_050_S6_DMSO      MTR_051_S66_DMSO   MTR_052_S7_AhHyd_Ag 
##             0.8075375             0.9666246             0.9211348 
##   MTR_053_S8_AhHyd_Ag   MTR_054_S9_AhHyd_Ag  MTR_055_S10_AhHyd_Ag 
##             1.0109683             0.9280385             0.9611123 
## MTR_056_S11_AhHyd_Inh MTR_057_S12_AhHyd_Inh MTR_058_S13_AhHyd_Inh 
##             0.8905950             1.0722145             0.9403409 
## MTR_059_S14_AhHyd_Inh    MTR_060_S15_Ret_Ag    MTR_061_S16_Ret_Ag 
##             1.0290424             1.1683055             0.9678029 
##    MTR_062_S17_Ret_Ag    MTR_063_S18_Ret_Ag   MTR_064_S19_Ret_Inh 
##             0.8396898             0.9066152             0.9706658 
##   MTR_065_S20_Ret_Inh   MTR_066_S21_Ret_Inh   MTR_067_S22_Ret_Inh 
##             1.1804171             1.1526323             1.1409635 
##   MTR_068_S23_Andr_Ag   MTR_069_S24_Andr_Ag   MTR_070_S25_Andr_Ag 
##             0.8229332             1.0312696             0.7829875 
##   MTR_071_S26_Andr_Ag  MTR_072_S27_Andr_Inh  MTR_073_S28_Andr_Inh 
##             1.0114604             0.9013051             1.0058793 
##  MTR_074_S29_Andr_Inh  MTR_075_S30_Andr_Inh   MTR_076_S31_LivX_Ag 
##             0.9080588             0.8172921             1.0934397 
##   MTR_077_S32_LivX_Ag   MTR_078_S33_LivX_Ag   MTR_079_S34_LivX_Ag 
##             1.5333475             1.0464944             1.2086393 
##  MTR_080_S35_LivX_Inh  MTR_081_S36_LivX_Inh  MTR_082_S37_LivX_Inh 
##             0.9383073             1.1471020             0.8598941 
##  MTR_083_S38_LivX_Inh     MTR_084_S39_GC_Ag     MTR_085_S40_GC_Ag 
##             0.8108603             1.3670248             1.0766677 
##     MTR_086_S41_GC_Ag     MTR_087_S42_GC_Ag    MTR_088_S43_GC_Inh 
##             1.1275554             1.2528067             0.9787086 
##    MTR_089_S44_GC_Inh    MTR_090_S45_GC_Inh    MTR_091_S46_GC_Inh 
##             1.0665263             1.2069962             1.1864471 
##   MTR_092_S47_Estr_Ag   MTR_093_S48_Estr_Ag   MTR_094_S49_Estr_Ag 
##             0.9114213             0.9750739             1.0407093 
##   MTR_095_S50_Estr_Ag  MTR_096_S51_Estr_Inh  MTR_097_S52_Estr_Inh 
##             0.9060048             0.8941877             0.9644566 
##  MTR_098_S53_Estr_Inh  MTR_099_S54_Estr_Inh   MTR_100_S55_Thyr_Ag 
##             0.8670195             1.1577416             1.2174494 
##   MTR_101_S56_Thyr_Ag   MTR_102_S57_Thyr_Ag   MTR_103_S58_Thyr_Ag 
##             1.2011112             1.2874366             0.8942726 
##  MTR_104_S59_Thyr_Inh  MTR_105_S60_Thyr_Inh  MTR_106_S61_Thyr_Inh 
##             0.8839412             0.9592396             0.9380859 
##  MTR_107_S62_Thyr_Inh      MTR_108_S63_DMSO      MTR_109_S64_DMSO 
##             1.0306940             1.2165280             1.0782776

5.2 Vst Transformation

VST <- assay(vst(dds, blind=TRUE))
boxplot(VST, use.cols = TRUE, col='palegreen')

5.3 Selecting on coefficient of variation

Creation of “Pathway” column.

CV <- apply(VST, 1, function(x) sd(x)/abs(mean(x)))
quantile(CV, probs=seq(0,1,0.1))
##          0%         10%         20%         30%         40%         50% 
## 0.003982777 0.011105953 0.013533833 0.015686049 0.017764815 0.020199697 
##         60%         70%         80%         90%        100% 
## 0.023090772 0.027209064 0.033531228 0.046387012 0.297307228
CVTh <- 0.35
hist(CV, col='palegreen', breaks=30)
abline(v=quantile(CV, probs=seq(0.5,1,0.1)), col='grey', lwd=1.5)
abline(v=quantile(CV, probs=CVTh), col='red', lwd=1.5)

select <- CV > quantile(CV, CVTh)
VSTSel <- VST[select, ]
dim(VSTSel)
## [1] 10726    66

5.4 Matrix Transposition

VstSel <- t(VSTSel)
dim(VstSel)  
## [1]    66 10726

5.5 SE object for downstream analyses

The SE object is used in downstream analyses for visualization purposed (e.g. module heatmaps).

dds_WGCNA <- dds[select,]
SE_WGCNA <- as(dds_WGCNA, "RangedSummarizedExperiment")
assays(SE_WGCNA)$vst <- assay(vst(dds_WGCNA, blind=TRUE))

6. Data Inspection

6.1 Check for missing values

gsg = WGCNA::goodSamplesGenes(VstSel, verbose=3)
##  Flagging genes and samples with too many missing values...
##   ..step 1
gsg$allOK 
## [1] TRUE

6.2 Check for outlier samples

sampleTree = hclust(dist(VstSel), method ="average")

par(cex = 0.6)
par(mar =c(0,4,2,0))
plot(sampleTree, main ="Sample clustering to detect outliers", sub ="", xlab ="", cex.lab = 1.5, cex.axis=1.5, cex.main =2)

6.3 Sample metadata visualization

sampleTree2 <- hclust(dist(VstSel), method='average')
traitColors <- numbers2colors(DummyTraits[c(5:20)], signed=TRUE)
plotDendroAndColors(sampleTree2, traitColors, groupLabels=names(DummyTraits[c(5:20)]), main = 'Sample dendrogram and trait heatmap')


7. Network construction

7.1 Define soft threshold

powers <- c(c(1:10), seq(from=12, to=20, by=2))

RPowerTable2 <- pickSoftThreshold(VstSel, powerVector=powers, verbose=F, networkType="signed", corFnc='bicor', corOptions=list(maxPOutliers = 0.05)) [[2]]
##    Power SFT.R.sq  slope truncated.R.sq mean.k. median.k. max.k.
## 1      1 1.84e-01 13.800          0.817  5390.0    5390.0   5700
## 2      2 1.93e-02  1.200          0.822  3060.0    3050.0   3530
## 3      3 5.65e-03  0.336          0.950  1900.0    1870.0   2500
## 4      4 8.61e-05 -0.026          0.980  1250.0    1230.0   1910
## 5      5 2.67e-02 -0.337          0.985   869.0     848.0   1530
## 6      6 1.33e-01 -0.640          0.976   627.0     605.0   1270
## 7      7 2.63e-01 -0.827          0.976   467.0     444.0   1080
## 8      8 4.05e-01 -1.010          0.976   357.0     333.0    934
## 9      9 5.36e-01 -1.160          0.983   279.0     253.0    820
## 10    10 6.16e-01 -1.290          0.974   222.0     196.0    728
## 11    12 7.29e-01 -1.470          0.977   146.0     121.0    587
## 12    14 7.91e-01 -1.580          0.976   102.0      78.1    485
## 13    16 8.28e-01 -1.650          0.979    73.2      52.1    408
## 14    18 8.47e-01 -1.710          0.977    54.3      35.4    347
## 15    20 8.73e-01 -1.720          0.983    41.3      24.8    299
par(mfrow=c(1,2))
cex1 = 0.9
# Scale-free topology fit index as a function of the soft-thresholding power
plot(RPowerTable2[,1], -sign(RPowerTable2[,3])*RPowerTable2[,2], xlab = 'Soft Threshold (power)', ylab = 'Scale Free Topology Model Fit, signed R^2', type = 'n', main = paste('Scale independence')) 
text(RPowerTable2[,1], -sign(RPowerTable2[,3])*RPowerTable2[,2], labels=powers, cex = cex1, col='red')
abline(h=0.8, col='red')  # The line corresponds to using an R^2 cut-off of h
plot(RPowerTable2[,1], RPowerTable2[,5], xlab = 'Soft Threshold (power)', ylab = 'Mean Connectivity', type = 'n', main = paste('Mean connectivity')) 
text(RPowerTable2[,1], RPowerTable2[,5], labels=powers, cex = cex1, col='red')

softPowerBi <- 14

7.2 Topological and Overlap matrices

adjacency <- adjacency(VstSel, power=softPowerBi, type="signed", corFnc='bicor', corOptions=list(maxPOutliers = 0.05))
TOM <- TOMsimilarity(adjacency, TOMType= "signed")
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.

dissTOM <- 1-TOM

8. Gene Tree and Module identification

8.1 Gene Tree

GeneTree <- hclust(as.dist(dissTOM), method = 'average')

plot(GeneTree, xlab='', sub='', main='Gene Clustering on TOM-based dissimilarity', labels=FALSE, hang=0.04)

8.2 Module identification: tuning parameters

DynamicColorsS1 <- labels2colors(cutreeDynamic(dendro=GeneTree, distM=dissTOM, deepSplit=1, pamRespectsDendro=TRUE, minClusterSize=50, pamStage=TRUE, cutHeight=0.999, maxCoreScatter=0.6, maxPamDist=0.98))
##  ..done.

DynamicColorsS2 <- labels2colors(cutreeDynamic(dendro=GeneTree, distM=dissTOM, deepSplit=1, pamRespectsDendro=TRUE, minClusterSize=40, pamStage=TRUE, cutHeight=0.999, maxCoreScatter=0.6, maxPamDist=0.98))
##  ..done.

DynamicColorsS3 <- labels2colors(cutreeDynamic(dendro=GeneTree, distM=dissTOM, deepSplit=1, pamRespectsDendro=TRUE, minClusterSize=60, pamStage=TRUE, cutHeight=0.9999, maxCoreScatter=0.6, maxPamDist=0.98))
##  ..done.

DynamicColorsS4 <- labels2colors(cutreeDynamic(dendro=GeneTree, distM=dissTOM, deepSplit=2, pamRespectsDendro=TRUE, minClusterSize=60, pamStage=TRUE, cutHeight=0.9999, maxCoreScatter=0.6, maxPamDist=0.98))
##  ..done.


plotDendroAndColors(GeneTree, cbind(DynamicColorsS1, DynamicColorsS2,DynamicColorsS3, DynamicColorsS4), c('S1','S2', 'S3', 'S4'), dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05, main = 'Gene dendrogram and module colors')

8.3 Choose parameters

DynamicModules <- cutreeDynamic(dendro=GeneTree, distM=dissTOM, deepSplit=1, pamRespectsDendro=TRUE, minClusterSize=60, pamStage=TRUE, cutHeight=0.9999, maxCoreScatter=0.6, maxPamDist=0.98)
##  ..done.

table(DynamicModules)
## DynamicModules
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
##  404 1640 1308  871  764  735  668  533  498  470  413  353  348  314  312  278 
##   16   17   18   19   20   21 
##  171  166  160  117  109   94

DynamicColors <- labels2colors(DynamicModules)
table(DynamicColors)
## DynamicColors
##        black         blue        brown         cyan      darkred        green 
##          533         1308          871          312           94          735 
##  greenyellow         grey       grey60    lightcyan   lightgreen  lightyellow 
##          353          404          166          171          160          117 
##      magenta midnightblue         pink       purple          red    royalblue 
##          470          278          498          413          668          109 
##       salmon          tan    turquoise       yellow 
##          314          348         1640          764

plotDendroAndColors(GeneTree, DynamicColors, 'Dynamic Tree Cut', dendroLabels=FALSE, hang=0.03, addGuide=TRUE, guideHang=0.05, main='Gene dendrogram and module colors')


9. Module merging

9.1 Calculate module eigengenes (ME)

Define numbers of genes and samples; then calculate Module Eigengenes and order them. A column order is applied, so that modules that are more similar are nearby.

MEs <- orderMEs(moduleEigengenes(VstSel, DynamicColors)$eigengenes)
head(MEs)
##                      MEbrown       MEtan MEmidnightblue    MEsalmon
## MTR_044_S1_CTL    0.01164482 -0.07122255    -0.01021068 -0.07573373
## MTR_045_S2_CTL    0.01740926 -0.09708247     0.03969511 -0.05890540
## MTR_046_S3_CTL    0.05065973 -0.05104592     0.02567098 -0.06944093
## MTR_047_S4_CTL    0.03114079 -0.09898086     0.01567783 -0.07417604
## MTR_048_S65_DMSO -0.01230385  0.02769178    -0.01430991 -0.04171881
## MTR_049_S5_DMSO   0.02713075  0.04848132    -0.07565658 -0.08349283
##                  MEgreenyellow    MEgrey60 MEturquoise    MEyellow      MEblack
## MTR_044_S1_CTL    -0.012960723 -0.03890884  0.08525720  0.01789722  0.023118038
## MTR_045_S2_CTL     0.029687604 -0.02855914  0.14620583  0.07998293 -0.023898247
## MTR_046_S3_CTL     0.037021737 -0.03068161  0.14099085  0.07521426 -0.011804424
## MTR_047_S4_CTL    -0.001706034 -0.04178070  0.12284714  0.05202469  0.008428860
## MTR_048_S65_DMSO   0.026072801 -0.03038528 -0.04238584 -0.01854488 -0.009501556
## MTR_049_S5_DMSO    0.004141788 -0.03489549 -0.05866922 -0.05931575  0.050182954
##                      MEgreen  MElightcyan   MEmagenta      MEblue     MEpink
## MTR_044_S1_CTL    0.02212816 -0.002625761 -0.03310224 -0.06946589 0.07460291
## MTR_045_S2_CTL   -0.01754827 -0.048625768 -0.08932301 -0.08753879 0.02792890
## MTR_046_S3_CTL   -0.03934939 -0.055179656 -0.05593879 -0.06969927 0.05076623
## MTR_047_S4_CTL   -0.01101092 -0.037642204 -0.07900949 -0.06469831 0.06968595
## MTR_048_S65_DMSO  0.04155309  0.050186067  0.07799477  0.02209544 0.01031534
## MTR_049_S5_DMSO   0.04106504  0.058151932  0.09199465  0.05833541 0.07591235
##                  MElightgreen MElightyellow   MEdarkred    MEpurple MEroyalblue
## MTR_044_S1_CTL     0.13439642    0.09126141 -0.03825282 -0.03756012  0.06297084
## MTR_045_S2_CTL     0.18913089    0.11185598 -0.01249125 -0.04916017  0.04847349
## MTR_046_S3_CTL     0.17748768    0.06980490 -0.04132819 -0.08690463  0.02578843
## MTR_047_S4_CTL     0.18990036    0.11492480 -0.03832638 -0.07039048  0.04782099
## MTR_048_S65_DMSO  -0.01198280   -0.04549458  0.02896907  0.04838819 -0.02783282
## MTR_049_S5_DMSO   -0.06366547   -0.04062887 -0.06841273 -0.03555880 -0.04673892
##                       MEcyan       MEred       MEgrey
## MTR_044_S1_CTL   -0.07436550 -0.04929607  0.114158921
## MTR_045_S2_CTL   -0.10506019 -0.06176492  0.113102440
## MTR_046_S3_CTL   -0.14063590 -0.07038433  0.157195871
## MTR_047_S4_CTL   -0.09196698 -0.04739005  0.162845528
## MTR_048_S65_DMSO -0.02944146 -0.07247749 -0.098689084
## MTR_049_S5_DMSO  -0.03558426 -0.05386847  0.009133643

9.2 ME Clustering

METree <- hclust(as.dist(1-cor(MEs)), method = 'average')
plot(METree, main = 'Clustering of module eigengenes')
abline(h=0.15, col = 'red')

9.3 Merge modules

MEDissThres <- 0.15
# Plot the cut line into the dendrogram
# Call an automatic merging function
merge <- mergeCloseModules(VstSel, DynamicColors, cutHeight = MEDissThres, verbose = 5, iterate = FALSE)
##  mergeCloseModules: Merging modules whose distance is less than 0.15
##    .. will look for grey label MEgrey
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes : Working on ME for module black
##       ... 533 genes
##      moduleEigengenes : Working on ME for module blue
##       ... 1308 genes
##      moduleEigengenes : Working on ME for module brown
##       ... 871 genes
##      moduleEigengenes : Working on ME for module cyan
##       ... 312 genes
##      moduleEigengenes : Working on ME for module darkred
##       ... 94 genes
##      moduleEigengenes : Working on ME for module green
##       ... 735 genes
##      moduleEigengenes : Working on ME for module greenyellow
##       ... 353 genes
##      moduleEigengenes : Working on ME for module grey60
##       ... 166 genes
##      moduleEigengenes : Working on ME for module lightcyan
##       ... 171 genes
##      moduleEigengenes : Working on ME for module lightgreen
##       ... 160 genes
##      moduleEigengenes : Working on ME for module lightyellow
##       ... 117 genes
##      moduleEigengenes : Working on ME for module magenta
##       ... 470 genes
##      moduleEigengenes : Working on ME for module midnightblue
##       ... 278 genes
##      moduleEigengenes : Working on ME for module pink
##       ... 498 genes
##      moduleEigengenes : Working on ME for module purple
##       ... 413 genes
##      moduleEigengenes : Working on ME for module red
##       ... 668 genes
##      moduleEigengenes : Working on ME for module royalblue
##       ... 109 genes
##      moduleEigengenes : Working on ME for module salmon
##       ... 314 genes
##      moduleEigengenes : Working on ME for module tan
##       ... 348 genes
##      moduleEigengenes : Working on ME for module turquoise
##       ... 1640 genes
##      moduleEigengenes : Working on ME for module yellow
##       ... 764 genes
##    Merging original colors midnightblue, salmon
##    Merging original colors turquoise, yellow
##    Merging original colors green, lightcyan
##    Calculating new MEs...
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes : Working on ME for module black
##       ... 533 genes
##      moduleEigengenes : Working on ME for module blue
##       ... 1308 genes
##      moduleEigengenes : Working on ME for module brown
##       ... 871 genes
##      moduleEigengenes : Working on ME for module cyan
##       ... 312 genes
##      moduleEigengenes : Working on ME for module darkred
##       ... 94 genes
##      moduleEigengenes : Working on ME for module green
##       ... 906 genes
##      moduleEigengenes : Working on ME for module greenyellow
##       ... 353 genes
##      moduleEigengenes : Working on ME for module grey
##       ... 404 genes
##      moduleEigengenes : Working on ME for module grey60
##       ... 166 genes
##      moduleEigengenes : Working on ME for module lightgreen
##       ... 160 genes
##      moduleEigengenes : Working on ME for module lightyellow
##       ... 117 genes
##      moduleEigengenes : Working on ME for module magenta
##       ... 470 genes
##      moduleEigengenes : Working on ME for module midnightblue
##       ... 592 genes
##      moduleEigengenes : Working on ME for module pink
##       ... 498 genes
##      moduleEigengenes : Working on ME for module purple
##       ... 413 genes
##      moduleEigengenes : Working on ME for module red
##       ... 668 genes
##      moduleEigengenes : Working on ME for module royalblue
##       ... 109 genes
##      moduleEigengenes : Working on ME for module tan
##       ... 348 genes
##      moduleEigengenes : Working on ME for module turquoise
##       ... 2404 genes
# The merged module colors
FinalColors <- merge$colors
# Eigengenes of the new merged
FinalMEs = merge$newMEs
plotDendroAndColors(GeneTree, cbind(DynamicColors, FinalColors),
c("Dynamic Tree Cut", "Merged dynamic"),
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)

ModuleAssignment <- data.frame(Gene=colnames(VstSel), Module=FinalColors)
table(ModuleAssignment$Module)
## 
##        black         blue        brown         cyan      darkred        green 
##          533         1308          871          312           94          906 
##  greenyellow         grey       grey60   lightgreen  lightyellow      magenta 
##          353          404          166          160          117          470 
## midnightblue         pink       purple          red    royalblue          tan 
##          592          498          413          668          109          348 
##    turquoise 
##         2404

10. Savings

save(adjacency, dissTOM, DummyTraits, FinalMEs, VstSel, ModuleAssignment, SE_WGCNA, file=paste0(OutputFolder, 'NetworkGeneration.RData'))
SessionInfo <- sessionInfo()
Date <- date()
save.image(paste0(OutputFolder, '/', 'NetworkGenerationWorkspace.RData'))
Date
## [1] "Mon Jul 21 19:29:05 2025"
SessionInfo
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] WGCNA_1.72-1                fastcluster_1.2.3          
##  [3] dynamicTreeCut_1.63-1       DESeq2_1.38.3              
##  [5] SummarizedExperiment_1.28.0 Biobase_2.58.0             
##  [7] MatrixGenerics_1.10.0       matrixStats_0.63.0         
##  [9] GenomicRanges_1.50.2        GenomeInfoDb_1.34.9        
## [11] IRanges_2.32.0              S4Vectors_0.36.1           
## [13] BiocGenerics_0.44.0         RNASeqBulkExploratory_0.2.1
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-7           bit64_4.0.5            doParallel_1.0.17     
##  [4] RColorBrewer_1.1-3     httr_1.4.5             backports_1.4.1       
##  [7] tools_4.2.1            bslib_0.4.2            utf8_1.2.3            
## [10] R6_2.5.1               DT_0.27                rpart_4.1.19          
## [13] Hmisc_4.8-0            DBI_1.1.3              colorspace_2.1-0      
## [16] nnet_7.3-18            tidyselect_1.2.0       gridExtra_2.3         
## [19] preprocessCore_1.60.2  bit_4.0.5              compiler_4.2.1        
## [22] cli_3.6.1              htmlTable_2.4.1        DelayedArray_0.24.0   
## [25] sass_0.4.5             checkmate_2.1.0        scales_1.2.1          
## [28] stringr_1.5.0          digest_0.6.31          foreign_0.8-84        
## [31] rmarkdown_2.20         XVector_0.38.0         base64enc_0.1-3       
## [34] jpeg_0.1-10            pkgconfig_2.0.3        htmltools_0.5.4       
## [37] highr_0.10             fastmap_1.1.1          htmlwidgets_1.6.1     
## [40] rlang_1.1.1            impute_1.72.3          rstudioapi_0.14       
## [43] RSQLite_2.3.0          jquerylib_0.1.4        generics_0.1.3        
## [46] jsonlite_1.8.4         BiocParallel_1.32.5    dplyr_1.1.0           
## [49] RCurl_1.98-1.10        magrittr_2.0.3         GO.db_3.16.0          
## [52] GenomeInfoDbData_1.2.9 Formula_1.2-5          interp_1.1-3          
## [55] Matrix_1.5-3           Rcpp_1.0.10            munsell_0.5.0         
## [58] fansi_1.0.4            lifecycle_1.0.3        stringi_1.7.12        
## [61] yaml_2.3.7             zlibbioc_1.44.0        grid_4.2.1            
## [64] blob_1.2.3             parallel_4.2.1         crayon_1.5.2          
## [67] deldir_1.0-6           lattice_0.20-45        Biostrings_2.66.0     
## [70] splines_4.2.1          annotate_1.76.0        KEGGREST_1.38.0       
## [73] locfit_1.5-9.7         knitr_1.42             pillar_1.8.1          
## [76] geneplotter_1.76.0     codetools_0.2-19       XML_3.99-0.13         
## [79] glue_1.6.2             evaluate_0.20          latticeExtra_0.6-30   
## [82] data.table_1.14.8      png_0.1-8              vctrs_0.6.2           
## [85] foreach_1.5.2          gtable_0.3.1           cachem_1.0.7          
## [88] ggplot2_3.4.1          xfun_0.37              xtable_1.8-4          
## [91] survival_3.5-3         tibble_3.2.1           iterators_1.0.14      
## [94] AnnotationDbi_1.60.0   memoise_2.0.1          cluster_2.1.4