Differential expression analysis on CTL04.

1. Environment Set Up

Values of RMarkdown parameters

for (i in 1:length(params))
  print(paste('Parameter:', names(params)[i], ' - Value:', params[[i]], '- Class:', class(params[[i]])))
## [1] "Parameter: Dataset  - Value: CTL04 - Class: character"
## [1] "Parameter: SE  - Value: ~/DataDir/bulkRNASeq/3.DataExploration/CTL04/Output/FemaleLineSE_Bio.rds - Class: character"
## [1] "Parameter: OutputFolder  - Value: ~/DataDir/bulkRNASeq/5.DifferentialExpression/CTL04/ - Class: character"
library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(SummarizedExperiment)
library(ggplot2)
library(sechm)
library(ashr)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following object is masked from 'package:BiocGenerics':
## 
##     combine
Dataset <- params$Dataset
OutputFolder <- params$OutputFolder

if (dir.exists(OutputFolder) == FALSE) {
 dir.create(OutputFolder, recursive=TRUE)
}

2. Data Upload

2.1 SE object

SE object created from exploratory analysis. Steps already performed: gene selection according to biotype.

SE_Bio <- readRDS(params$SE)

2.2 Discard duplicated gene names

SE_Bio <- SE_Bio[!duplicated(rowData(SE_Bio)$GeneName), ]
rownames(SE_Bio) <- rowData(SE_Bio)$GeneName

2.3 Sample selection

Sample selection was not necessary.

SE object containing information about 19892 genes in 66 samples.


3. Generation of dds

CountMatrix <- assays(SE_Bio)$counts
SampleMeta <- DataFrame(colData(SE_Bio))

if(! all(rownames(SampleMeta) == colnames(CountMatrix))){
  stop('Inconsistency in count matrix and sample metadata')
}
dds <- DESeqDataSetFromMatrix(countData=assays(SE_Bio)$counts, SampleMeta, ~ Condition)
## converting counts to integer mode

# to add gene metadata
mcols(dds) <- DataFrame(mcols(dds), DataFrame(rowData(SE_Bio)))

dds
## class: DESeqDataSet 
## dim: 19892 66 
## metadata(1): version
## assays(1): counts
## rownames(19892): TSPAN6 TNMD ... AC114402.3 AC084756.2
## rowData names(9): Gene EnsGene ... Start End
## colnames(66): MTR_044_S1_CTL MTR_045_S2_CTL ... MTR_108_S63_DMSO
##   MTR_109_S64_DMSO
## colData names(20): InternalUniqueID HRID ... SeqPlatform lib.size

4. Gene filtering

4.1 Keep only protein-coding genes

keep <- mcols(dds)$GeneBiotype=="protein_coding"
table(keep)
## keep
##  TRUE 
## 19892
dds <- dds[keep,]

4.2 Discard very lowly expressed genes

keep <- rowSums(counts(dds)>=30) >= 6

table(keep)
## keep
## FALSE  TRUE 
##  5587 14305
dds <- dds[keep,]

dds
## class: DESeqDataSet 
## dim: 14305 66 
## metadata(1): version
## assays(1): counts
## rownames(14305): TSPAN6 TNMD ... AC013477.2 AC005192.1
## rowData names(9): Gene EnsGene ... Start End
## colnames(66): MTR_044_S1_CTL MTR_045_S2_CTL ... MTR_108_S63_DMSO
##   MTR_109_S64_DMSO
## colData names(20): InternalUniqueID HRID ... SeqPlatform lib.size

A dds object containing information about 14305 genes in 66 samples is tested for differential expression.


5. Standard DEA

5.1 Size Factors

dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
sizeFactors(dds)
##        MTR_044_S1_CTL        MTR_045_S2_CTL        MTR_046_S3_CTL 
##             0.9674738             0.8350053             0.7994774 
##        MTR_047_S4_CTL      MTR_048_S65_DMSO       MTR_049_S5_DMSO 
##             0.8647039             1.1545811             0.8927725 
##       MTR_050_S6_DMSO      MTR_051_S66_DMSO   MTR_052_S7_AhHyd_Ag 
##             0.8062155             0.9678828             0.9198482 
##   MTR_053_S8_AhHyd_Ag   MTR_054_S9_AhHyd_Ag  MTR_055_S10_AhHyd_Ag 
##             1.0113918             0.9267874             0.9605147 
## MTR_056_S11_AhHyd_Inh MTR_057_S12_AhHyd_Inh MTR_058_S13_AhHyd_Inh 
##             0.8931790             1.0721826             0.9423563 
## MTR_059_S14_AhHyd_Inh    MTR_060_S15_Ret_Ag    MTR_061_S16_Ret_Ag 
##             1.0321320             1.1619898             0.9653531 
##    MTR_062_S17_Ret_Ag    MTR_063_S18_Ret_Ag   MTR_064_S19_Ret_Inh 
##             0.8371725             0.9028409             0.9680725 
##   MTR_065_S20_Ret_Inh   MTR_066_S21_Ret_Inh   MTR_067_S22_Ret_Inh 
##             1.1748271             1.1443310             1.1357192 
##   MTR_068_S23_Andr_Ag   MTR_069_S24_Andr_Ag   MTR_070_S25_Andr_Ag 
##             0.8208237             1.0292281             0.7800034 
##   MTR_071_S26_Andr_Ag  MTR_072_S27_Andr_Inh  MTR_073_S28_Andr_Inh 
##             1.0080208             0.8955569             1.0022834 
##  MTR_074_S29_Andr_Inh  MTR_075_S30_Andr_Inh   MTR_076_S31_LivX_Ag 
##             0.9102741             0.8174174             1.0912738 
##   MTR_077_S32_LivX_Ag   MTR_078_S33_LivX_Ag   MTR_079_S34_LivX_Ag 
##             1.5235693             1.0443965             1.2029858 
##  MTR_080_S35_LivX_Inh  MTR_081_S36_LivX_Inh  MTR_082_S37_LivX_Inh 
##             0.9385258             1.1453717             0.8640111 
##  MTR_083_S38_LivX_Inh     MTR_084_S39_GC_Ag     MTR_085_S40_GC_Ag 
##             0.8111974             1.3632485             1.0751860 
##     MTR_086_S41_GC_Ag     MTR_087_S42_GC_Ag    MTR_088_S43_GC_Inh 
##             1.1246536             1.2487964             0.9790384 
##    MTR_089_S44_GC_Inh    MTR_090_S45_GC_Inh    MTR_091_S46_GC_Inh 
##             1.0710966             1.2139912             1.1863333 
##   MTR_092_S47_Estr_Ag   MTR_093_S48_Estr_Ag   MTR_094_S49_Estr_Ag 
##             0.9102834             0.9704992             1.0409129 
##   MTR_095_S50_Estr_Ag  MTR_096_S51_Estr_Inh  MTR_097_S52_Estr_Inh 
##             0.9073002             0.8912654             0.9623986 
##  MTR_098_S53_Estr_Inh  MTR_099_S54_Estr_Inh   MTR_100_S55_Thyr_Ag 
##             0.8698593             1.1558152             1.2083761 
##   MTR_101_S56_Thyr_Ag   MTR_102_S57_Thyr_Ag   MTR_103_S58_Thyr_Ag 
##             1.1990743             1.2799726             0.8887268 
##  MTR_104_S59_Thyr_Inh  MTR_105_S60_Thyr_Inh  MTR_106_S61_Thyr_Inh 
##             0.8873289             0.9631042             0.9381161 
##  MTR_107_S62_Thyr_Inh      MTR_108_S63_DMSO      MTR_109_S64_DMSO 
##             1.0372043             1.2184272             1.0777296
SF <-data.frame(Sample=names(sizeFactors(dds)), SizeF=sizeFactors(dds))

ggplot(SF, aes(Sample, SizeF)) +
  geom_point() + guides(x =  guide_axis(angle = 90)) +
  ylim(0, 1.5)

5.2 Vst Transformation

SE_DEA <- as(dds, "RangedSummarizedExperiment")
assays(SE_DEA)$vst <- assay(vst(dds, blind=TRUE))
colData(SE_DEA)$Type <- factor(colData(SE_DEA)$Type, levels=c("CTL", "DMSO", "Agonist", "Inhibitor"))
colData(SE_DEA)$Pathway <- factor(colData(SE_DEA)$Pathway, levels=c("None", "AhHyd", "Andr", "Estr", "GC", "LivX", "Ret", "Thyr"))

metadata(SE_DEA)$anno_colors <- list(Pathway = c(AhHyd = "#C8C3A7", Andr = "#DBD294", Estr = "#E07A5F", GC = "#3D405B", LivX = "#81B29A", None = "grey", Ret = "#F2CC8F", Thyr = "#E59B24"), Type = c(DMSO='grey35', CTL= 'grey20',Agonist="#98A534", Inhibitor="#3E395C"), Condition = c('DMSO' = 'grey30', 'CTL' = 'azure3', 
                 'AhHyd_Ag'='#F8766D', 'AhHyd_Inh'='#F8766D50',
                 'Andr_Ag'='#fccb17', 'Andr_Inh'='#C49A0050',  
                 "Estr_Ag"= '#53B400', "Estr_Inh"= '#53B40050', 
                 'GC_Ag' = '#00C094', 'GC_Inh' = '#00C09450',
                 'LivX_Ag' = '#00B6EB', 'LivX_Inh' = '#00B6EB50', 
                 'Ret_Ag' = '#A58AFF', 'Ret_Inh' = '#A58AFF50', 
                 'Thyr_Ag' = '#FB61D7', 'Thyr_Inh' = '#FB61D750'
                 ))

5.3 Create List to store results

Treat <- levels(colData(SE_DEA)$Pathway)[-1]

Treat
## [1] "AhHyd" "Andr"  "Estr"  "GC"    "LivX"  "Ret"   "Thyr"
DEA_Res <- list()

for(i in 1:length(Treat)){
  DEA_Res[[i]] <- list()}

names(DEA_Res) <- Treat

6. Thyroid treatment

6.1 Thyroid Agonist vs DMSO

Path <- Treat[7]
Path
## [1] "Thyr"

6.1.1 DEA

res <- results(dds, contrast=c("Condition","Thyr_Ag", "DMSO"), alpha=0.01, cooksCutoff=0.90, independentFiltering=FALSE)
summary(res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 3788, 26%
## LFC < 0 (down)     : 3726, 26%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

6.1.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "Thyr_Ag", "DMSO"),  type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 3789, 26%
## LFC < 0 (down)     : 3730, 26%
## outliers [1]       : 4, 0.028%
## low counts [2]     : 0, 0%
## (mean count < 6)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition Thyr_Ag vs DMSO 
## Wald test p-value: Condition Thyr_Ag vs DMSO 
## DataFrame with 6 rows and 5 columns
##          baseMean log2FoldChange     lfcSE       pvalue         padj
##         <numeric>      <numeric> <numeric>    <numeric>    <numeric>
## DIO3      425.248        4.57810 0.1809114 2.40913e-143 3.44529e-139
## KLF9      184.458        2.54011 0.1435680  3.65939e-73  2.61664e-69
## ANGPTL2   867.544        1.63561 0.0923117  2.04703e-72  9.75821e-69
## RTL1      157.662        2.13698 0.1370685  1.36919e-56  4.89520e-53
## GPR158    210.012        1.46976 0.1110114  8.43078e-43  2.41137e-39
## HOXC4     650.767      -35.22998 2.6669486  2.16762e-40  5.16653e-37
  • Genes modulated considering a threshold of 0.05 on the FDR: 6562

  • Genes modulated considering a threshold of 0.05 on the FDR and of 1.5 on the FC: 757

  • Genes modulated considering a threshold of 0.05 on the FDR and of 2 on the FC: 147

  • Genes modulated considering a threshold of 0.01 on the FDR: 4874

  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 757

  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 147

6.1.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

S <- grepl('DMSO|Thyr', colnames(SE_DEA))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm::sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm::sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst",  gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols, do.scale=TRUE, breaks=0.8)

MA Plot

DESeq2::plotMA(Res, ylim=c(-4,4))

Counts

plot_list = list()

for(i in 1:12){
  d <-plotCounts(dds, gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition",  returnData=TRUE)
  p <- ggplot(d, aes(x = Condition, y = count, color = Condition)) +
    geom_point(position=position_jitter(w = 0.1,h = 0)) +
    scale_x_discrete(guide = guide_axis(angle = 45)) +
    theme_bw() + ggtitle(row.names(Res[order(Res$padj), ])[i]) + theme(plot.title = element_text(hjust = 0.5))
  plot_list[[i]] <- p
}
grid.arrange(grobs = plot_list)

#  par(mfrow=c(3,4))
# 
# #grepl(Path, colnames(SE_DEA))
# 
# for(i in 1:12){
#   plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
# }

6.1.4 Store Results

DEA_Res[[Path]]$Agonist$Res <- Res
DEA_Res[[Path]]$Agonist$DEGs <- DEGs

names(DEA_Res[[Path]])
## [1] "Agonist"

rm(Res, DEGs)

6.2 Thyroid Inhibitor vs DMSO

6.2.1 DEA

res <- results(dds, contrast=c("Condition","Thyr_Inh", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4085, 29%
## LFC < 0 (down)     : 4230, 30%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

6.2.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "Thyr_Inh", "DMSO"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4085, 29%
## LFC < 0 (down)     : 4230, 30%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition Thyr_Inh vs DMSO 
## Wald test p-value: Condition Thyr_Inh vs DMSO 
## DataFrame with 6 rows and 5 columns
##         baseMean log2FoldChange     lfcSE       pvalue         padj
##        <numeric>      <numeric> <numeric>    <numeric>    <numeric>
## HSPH1  27065.207        1.94384 0.0403071  0.00000e+00  0.00000e+00
## H2BC5   1200.982        2.17575 0.0547110  0.00000e+00  0.00000e+00
## H2AC6    839.295        2.42805 0.0574605  0.00000e+00  0.00000e+00
## CPT1A    348.222        3.79629 0.1064910 6.81028e-283 2.43348e-279
## CACYBP  5709.990        1.36084 0.0411153 3.20123e-242 9.15104e-239
## DNAJB1 13899.589        2.35420 0.0728212 4.34312e-231 1.03460e-227
  • Genes modulated considering a threshold of 0.05 on the FDR: 7390

  • Genes modulated considering a threshold of 0.05 on the FDR and of 1.5 on the FC: 1306

  • Genes modulated considering a threshold of 0.05 on the FDR and of 2 on the FC: 417

  • Genes modulated considering a threshold of 0.01 on the FDR: 5930

  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 1302

  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 417

6.2.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm::sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm::sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

DESeq2::plotMA(Res, ylim=c(-4,4))

Counts

plot_list = list()

for(i in 1:12){
  d <-plotCounts(dds, gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition",  returnData=TRUE)
  p <- ggplot(d, aes(x = Condition, y = count, color = Condition)) +
    geom_point(position=position_jitter(w = 0.1,h = 0)) +
    scale_x_discrete(guide = guide_axis(angle = 45)) +
    theme_bw() + ggtitle(row.names(Res[order(Res$padj), ])[i]) + theme(plot.title = element_text(hjust = 0.5))
  plot_list[[i]] <- p
}
grid.arrange(grobs = plot_list)

#  par(mfrow=c(3,4))
# 
# #grepl(Path, colnames(SE_DEA))
# 
# for(i in 1:12){
#   plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
# }

6.2.4 Store Results

DEA_Res[[Path]]$Inhibitor$Res <- Res
DEA_Res[[Path]]$Inhibitor$DEGs <- DEGs

names(DEA_Res[[Path]])
## [1] "Agonist"   "Inhibitor"

rm(Res, DEGs)

6.3 Thyroid Agonist vs Inhibitor

6.3.1 DEA

res <- results(dds, contrast=c("Condition","Thyr_Ag", "Thyr_Inh"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4371, 31%
## LFC < 0 (down)     : 3984, 28%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

6.3.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "Thyr_Ag", "Thyr_Inh"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4371, 31%
## LFC < 0 (down)     : 3984, 28%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition Thyr_Ag vs Thyr_Inh 
## Wald test p-value: Condition Thyr_Ag vs Thyr_Inh 
## DataFrame with 6 rows and 5 columns
##         baseMean log2FoldChange     lfcSE       pvalue         padj
##        <numeric>      <numeric> <numeric>    <numeric>    <numeric>
## HSPH1  27065.207       -1.54383 0.0440305 8.84871e-271 1.26475e-266
## DNAJB1 13899.589       -2.34126 0.0797788 8.23812e-192 5.88737e-188
## CPT1A    348.222       -3.30314 0.1138710 8.56155e-191 4.07901e-187
## H2AC6    839.295       -1.69938 0.0596465 5.73779e-180 2.05026e-176
## H2BC5   1200.982       -1.65762 0.0583908 1.09408e-178 3.12753e-175
## HSPA1B 27804.866       -2.71360 0.1087292 8.78470e-141 2.09266e-137
  • Genes modulated considering a threshold of 0.05 on the FDR: 7416

  • Genes modulated considering a threshold of 0.05 on the FDR and of 1.5 on the FC: 1229

  • Genes modulated considering a threshold of 0.05 on the FDR and of 2 on the FC: 324

  • Genes modulated considering a threshold of 0.01 on the FDR: 5842

  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 1228

  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 324

6.3.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm::sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm::sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

DESeq2::plotMA(Res, ylim=c(-4,4))

Counts

plot_list = list()

for(i in 1:12){
  d <-plotCounts(dds, gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition",  returnData=TRUE)
  p <- ggplot(d, aes(x = Condition, y = count, color = Condition)) +
    geom_point(position=position_jitter(w = 0.1,h = 0)) +
    scale_x_discrete(guide = guide_axis(angle = 45)) +
    theme_bw() + ggtitle(row.names(Res[order(Res$padj), ])[i]) + theme(plot.title = element_text(hjust = 0.5))
  plot_list[[i]] <- p
}
grid.arrange(grobs = plot_list)

#  par(mfrow=c(3,4))
# 
# #grepl(Path, colnames(SE_DEA))
# 
# for(i in 1:12){
#   plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
# }

6.3.4 Store Results

DEA_Res[[Path]]$AgvsInh$Res <- Res
DEA_Res[[Path]]$AgvsInh$DEGs <- DEGs

names(DEA_Res[[Path]])
## [1] "Agonist"   "Inhibitor" "AgvsInh"

rm(Res, DEGs)

7. Retinoid treatment

7.1 Retinoid Agonist vs DMSO

Path <- Treat[6]
Path
## [1] "Ret"

7.1.1 DEA

res <- results(dds, contrast=c("Condition","Ret_Ag", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 5910, 41%
## LFC < 0 (down)     : 5857, 41%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

7.1.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "Ret_Ag", "DMSO"),  type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 5917, 41%
## LFC < 0 (down)     : 5858, 41%
## outliers [1]       : 4, 0.028%
## low counts [2]     : 0, 0%
## (mean count < 6)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition Ret_Ag vs DMSO 
## Wald test p-value: Condition Ret_Ag vs DMSO 
## DataFrame with 6 rows and 5 columns
##         baseMean log2FoldChange     lfcSE    pvalue      padj
##        <numeric>      <numeric> <numeric> <numeric> <numeric>
## SKAP2    530.176        6.40965 0.1667049         0         0
## TFAP2B 10223.260      -10.57854 0.2574168         0         0
## CDH1     160.089        6.32335 0.1494744         0         0
## PHF21B  6058.039       -1.96075 0.0521792         0         0
## PAX3    5962.763       -3.34975 0.0691134         0         0
## PI15    3660.599       -6.86121 0.1787824         0         0
  • Genes modulated considering a threshold of 0.05 on the FDR: 11332

  • Genes modulated considering a threshold of 0.05 on the FDR and of 1.5 on the FC: 5215

  • Genes modulated considering a threshold of 0.05 on the FDR and of 2 on the FC: 2971

  • Genes modulated considering a threshold of 0.01 on the FDR: 10536

  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 5186

  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 2969

7.1.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

S <- grepl('DMSO|Ret', colnames(SE_DEA))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm::sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm::sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:100,]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols, show_rownames = TRUE,
      do.scale=TRUE, breaks=0.8)

sechm::sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:100,]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols, show_rownames = TRUE,
      do.scale=TRUE, breaks=0.8)

MA Plot

DESeq2::plotMA(Res, ylim=c(-4,4))

Counts

plot_list = list()

for(i in 1:12){
  d <-plotCounts(dds, gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition",  returnData=TRUE)
  p <- ggplot(d, aes(x = Condition, y = count, color = Condition)) +
    geom_point(position=position_jitter(w = 0.1,h = 0)) +
    scale_x_discrete(guide = guide_axis(angle = 45)) +
    theme_bw() + ggtitle(row.names(Res[order(Res$padj), ])[i]) + theme(plot.title = element_text(hjust = 0.5))
  plot_list[[i]] <- p
}
grid.arrange(grobs = plot_list)

#  par(mfrow=c(3,4))
# 
# #grepl(Path, colnames(SE_DEA))
# 
# for(i in 1:12){
#   plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
# }

7.1.4 Store Results

DEA_Res[[Path]]$Agonist$Res <- Res
DEA_Res[[Path]]$Agonist$DEGs <- DEGs

rm(Res, DEGs)

7.2 Retinoid Antagonist vs DMSO

7.2.1 DEA

res <- results(dds, contrast=c("Condition","Ret_Inh", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4872, 34%
## LFC < 0 (down)     : 4858, 34%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

7.2.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "Ret_Inh", "DMSO"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4872, 34%
## LFC < 0 (down)     : 4858, 34%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition Ret_Inh vs DMSO 
## Wald test p-value: Condition Ret_Inh vs DMSO 
## DataFrame with 6 rows and 5 columns
##          baseMean log2FoldChange     lfcSE       pvalue         padj
##         <numeric>      <numeric> <numeric>    <numeric>    <numeric>
## HSPA8    90416.79      -1.490623 0.0351983  0.00000e+00  0.00000e+00
## HSPH1    27065.21      -1.346815 0.0412063 3.84108e-236 2.74503e-232
## DNAJA1   23425.52      -0.908372 0.0384361 1.72409e-125 8.21412e-122
## DYNLL1    7330.58      -0.855561 0.0396647 5.35422e-105 1.91320e-101
## HSP90B1  20322.88      -1.127927 0.0524976 1.01398e-104 2.89857e-101
## RNF145    2795.95      -0.707149 0.0357470  2.34581e-88  5.58811e-85
  • Genes modulated considering a threshold of 0.05 on the FDR: 8989

  • Genes modulated considering a threshold of 0.05 on the FDR and of 1.5 on the FC: 2003

  • Genes modulated considering a threshold of 0.05 on the FDR and of 2 on the FC: 787

  • Genes modulated considering a threshold of 0.01 on the FDR: 7671

  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 1998

  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 787

7.2.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm::sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm::sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

DESeq2::plotMA(Res, ylim=c(-4,4))

Counts

plot_list = list()

for(i in 1:12){
  d <-plotCounts(dds, gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition",  returnData=TRUE)
  p <- ggplot(d, aes(x = Condition, y = count, color = Condition)) +
    geom_point(position=position_jitter(w = 0.1,h = 0)) +
    scale_x_discrete(guide = guide_axis(angle = 45)) +
    theme_bw() + ggtitle(row.names(Res[order(Res$padj), ])[i]) + theme(plot.title = element_text(hjust = 0.5))
  plot_list[[i]] <- p
}
grid.arrange(grobs = plot_list)

#  par(mfrow=c(3,4))
# 
# #grepl(Path, colnames(SE_DEA))
# 
# for(i in 1:12){
#   plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
# }

7.2.4 Store Results

DEA_Res[[Path]]$Inhibitor$Res <- Res
DEA_Res[[Path]]$Inhibitor$DEGs <- DEGs

rm(Res, DEGs)

7.3 Retinoic Agonist vs Inhibitor

7.3.1 DEA

res <- results(dds, contrast=c("Condition","Ret_Ag", "Ret_Inh"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 5751, 40%
## LFC < 0 (down)     : 6182, 43%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

7.3.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "Ret_Ag", "Ret_Inh"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 5751, 40%
## LFC < 0 (down)     : 6182, 43%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition Ret_Ag vs Ret_Inh 
## Wald test p-value: Condition Ret_Ag vs Ret_Inh 
## DataFrame with 6 rows and 5 columns
##         baseMean log2FoldChange     lfcSE    pvalue      padj
##        <numeric>      <numeric> <numeric> <numeric> <numeric>
## TFAP2B 10223.260       -9.89270 0.2588611         0         0
## CDH1     160.089        5.71251 0.1515975         0         0
## HSPA8  90416.792        1.91066 0.0384964         0         0
## HSPH1  27065.207        2.48575 0.0447525         0         0
## SH3BP4  6036.265       -1.93641 0.0500167         0         0
## PAX3    5962.763       -3.77108 0.0744285         0         0
  • Genes modulated considering a threshold of 0.05 on the FDR: 11528

  • Genes modulated considering a threshold of 0.05 on the FDR and of 1.5 on the FC: 6119

  • Genes modulated considering a threshold of 0.05 on the FDR and of 2 on the FC: 3546

  • Genes modulated considering a threshold of 0.01 on the FDR: 10771

  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 6060

  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 3541

7.3.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm::sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.

Heatmap all samples

sechm::sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

DESeq2::plotMA(Res, ylim=c(-4,4))

Counts

plot_list = list()

for(i in 1:12){
  d <-plotCounts(dds, gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition",  returnData=TRUE)
  p <- ggplot(d, aes(x = Condition, y = count, color = Condition)) +
    geom_point(position=position_jitter(w = 0.1,h = 0)) +
    scale_x_discrete(guide = guide_axis(angle = 45)) +
    theme_bw() + ggtitle(row.names(Res[order(Res$padj), ])[i]) + theme(plot.title = element_text(hjust = 0.5))
  plot_list[[i]] <- p
}
grid.arrange(grobs = plot_list)

#  par(mfrow=c(3,4))
# 
# #grepl(Path, colnames(SE_DEA))
# 
# for(i in 1:12){
#   plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
# }

7.3.4 Store Results

DEA_Res[[Path]]$AgvsInh$Res <- Res
DEA_Res[[Path]]$AgvsInh$DEGs <- DEGs

rm(Res, DEGs)

8. LiverX treatment

8.1 LiverX Agonist vs DMSO

Path <- Treat[5]
Path
## [1] "LivX"

8.1.1 DEA

res <- results(dds, contrast=c("Condition","LivX_Ag", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 1212, 8.5%
## LFC < 0 (down)     : 1299, 9.1%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

8.1.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "LivX_Ag", "DMSO"),  type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 1212, 8.5%
## LFC < 0 (down)     : 1298, 9.1%
## outliers [1]       : 4, 0.028%
## low counts [2]     : 0, 0%
## (mean count < 6)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition LivX_Ag vs DMSO 
## Wald test p-value: Condition LivX_Ag vs DMSO 
## DataFrame with 6 rows and 5 columns
##         baseMean log2FoldChange     lfcSE       pvalue         padj
##        <numeric>      <numeric> <numeric>    <numeric>    <numeric>
## ABCG1    820.080       3.508425 0.0885724  0.00000e+00  0.00000e+00
## ABCA1   1963.617       3.663138 0.0746760  0.00000e+00  0.00000e+00
## SREBF1  1859.277       2.823612 0.0792649 1.80327e-278 8.59619e-275
## MYLIP    235.676       2.669199 0.1130551 5.77862e-124 2.06600e-120
## HSPA8  90416.792      -0.692371 0.0350030  2.33041e-88  6.66543e-85
## HSPH1  27065.207      -0.758947 0.0407038  3.87859e-79  9.24462e-76
  • Genes modulated considering a threshold of 0.05 on the FDR: 1721

  • Genes modulated considering a threshold of 0.05 on the FDR and of 1.5 on the FC: 78

  • Genes modulated considering a threshold of 0.05 on the FDR and of 2 on the FC: 16

  • Genes modulated considering a threshold of 0.01 on the FDR: 874

  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 78

  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 16

8.1.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

S <- grepl('DMSO|LivX', colnames(SE_DEA))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm::sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm::sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

DESeq2::plotMA(Res, ylim=c(-4,4))

Counts

plot_list = list()

for(i in 1:12){
  d <-plotCounts(dds, gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition",  returnData=TRUE)
  p <- ggplot(d, aes(x = Condition, y = count, color = Condition)) +
    geom_point(position=position_jitter(w = 0.1,h = 0)) +
    scale_x_discrete(guide = guide_axis(angle = 45)) +
    theme_bw() + ggtitle(row.names(Res[order(Res$padj), ])[i]) + theme(plot.title = element_text(hjust = 0.5))
  plot_list[[i]] <- p
}
grid.arrange(grobs = plot_list)

#  par(mfrow=c(3,4))
# 
# #grepl(Path, colnames(SE_DEA))
# 
# for(i in 1:12){
#   plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
# }

8.1.4 Store Results

DEA_Res[[Path]]$Agonist$Res <- Res
DEA_Res[[Path]]$Agonist$DEGs <- DEGs

rm(Res, DEGs)

8.2 LiverX Inhibitor vs DMSO

8.2.1 DEA

res <- results(dds, contrast=c("Condition","LivX_Inh", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4025, 28%
## LFC < 0 (down)     : 3811, 27%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

8.2.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "LivX_Inh", "DMSO"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4025, 28%
## LFC < 0 (down)     : 3811, 27%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition LivX_Inh vs DMSO 
## Wald test p-value: Condition LivX_Inh vs DMSO 
## DataFrame with 6 rows and 5 columns
##            baseMean log2FoldChange     lfcSE       pvalue         padj
##           <numeric>      <numeric> <numeric>    <numeric>    <numeric>
## HSPH1     27065.207        1.81859 0.0403021  0.00000e+00  0.00000e+00
## H2AC6       839.295        2.27359 0.0578789  0.00000e+00  0.00000e+00
## HSPA8     90416.792        1.23538 0.0349798 7.49194e-275 3.56941e-271
## HSP90AA1 145932.015        1.89133 0.0540243 4.99450e-271 1.78466e-267
## SREBF1     1859.277       -3.50014 0.1031436 2.86496e-255 8.18976e-252
## ABCA1      1963.617       -3.66168 0.1108632 2.45898e-242 5.85771e-239
  • Genes modulated considering a threshold of 0.05 on the FDR: 6909

  • Genes modulated considering a threshold of 0.05 on the FDR and of 1.5 on the FC: 1148

  • Genes modulated considering a threshold of 0.05 on the FDR and of 2 on the FC: 351

  • Genes modulated considering a threshold of 0.01 on the FDR: 5351

  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 1146

  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 351

8.2.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm::sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm::sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

DESeq2::plotMA(Res, ylim=c(-4,4))

Counts

plot_list = list()

for(i in 1:12){
  d <-plotCounts(dds, gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition",  returnData=TRUE)
  p <- ggplot(d, aes(x = Condition, y = count, color = Condition)) +
    geom_point(position=position_jitter(w = 0.1,h = 0)) +
    scale_x_discrete(guide = guide_axis(angle = 45)) +
    theme_bw() + ggtitle(row.names(Res[order(Res$padj), ])[i]) + theme(plot.title = element_text(hjust = 0.5))
  plot_list[[i]] <- p
}
grid.arrange(grobs = plot_list)

#  par(mfrow=c(3,4))
# 
# #grepl(Path, colnames(SE_DEA))
# 
# for(i in 1:12){
#   plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
# }

8.2.4 Store Results

DEA_Res[[Path]]$Inhibitor$Res <- Res
DEA_Res[[Path]]$Inhibitor$DEGs <- DEGs

rm(Res, DEGs)

8.3 LiverX Agonist vs Inhibitor

8.3.1 DEA

res <- results(dds, contrast=c("Condition","LivX_Ag", "LivX_Inh"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4086, 29%
## LFC < 0 (down)     : 4299, 30%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

8.3.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "LivX_Ag", "LivX_Inh"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4086, 29%
## LFC < 0 (down)     : 4299, 30%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition LivX_Ag vs LivX_Inh 
## Wald test p-value: Condition LivX_Ag vs LivX_Inh 
## DataFrame with 6 rows and 5 columns
##           baseMean log2FoldChange     lfcSE    pvalue      padj
##          <numeric>      <numeric> <numeric> <numeric> <numeric>
## FKBP4     12266.28       -1.82559 0.0485294         0         0
## SREBF1     1859.28        6.33916 0.1078036         0         0
## HSP90AB1  89908.54       -1.61258 0.0429221         0         0
## HSPA8     90416.79       -1.93342 0.0383806         0         0
## HSPH1     27065.21       -2.58684 0.0443941         0         0
## ABCG1       820.08        6.64285 0.1364577         0         0
  • Genes modulated considering a threshold of 0.05 on the FDR: 7482

  • Genes modulated considering a threshold of 0.05 on the FDR and of 1.5 on the FC: 1480

  • Genes modulated considering a threshold of 0.05 on the FDR and of 2 on the FC: 483

  • Genes modulated considering a threshold of 0.01 on the FDR: 5886

  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 1475

  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 483

8.3.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm::sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm::sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

DESeq2::plotMA(Res, ylim=c(-4,4))

Counts

plot_list = list()

for(i in 1:12){
  d <-plotCounts(dds, gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition",  returnData=TRUE)
  p <- ggplot(d, aes(x = Condition, y = count, color = Condition)) +
    geom_point(position=position_jitter(w = 0.1,h = 0)) +
    scale_x_discrete(guide = guide_axis(angle = 45)) +
    theme_bw() + ggtitle(row.names(Res[order(Res$padj), ])[i]) + theme(plot.title = element_text(hjust = 0.5))
  plot_list[[i]] <- p
}
grid.arrange(grobs = plot_list)

#  par(mfrow=c(3,4))
# 
# #grepl(Path, colnames(SE_DEA))
# 
# for(i in 1:12){
#   plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
# }

8.3.4 Store Results

DEA_Res[[Path]]$AgvsInh$Res <- Res
DEA_Res[[Path]]$AgvsInh$DEGs <- DEGs

rm(Res, DEGs)

9. Estrogen treatment

9.1 Estrogen Agonist vs DMSO

Path <- Treat[3] 
Path
## [1] "Estr"

9.1.1 DEA

res <- results(dds, contrast=c("Condition","Estr_Ag", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 346, 2.4%
## LFC < 0 (down)     : 347, 2.4%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

9.1.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "Estr_Ag", "DMSO"),  type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 346, 2.4%
## LFC < 0 (down)     : 347, 2.4%
## outliers [1]       : 4, 0.028%
## low counts [2]     : 0, 0%
## (mean count < 6)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition Estr_Ag vs DMSO 
## Wald test p-value: Condition Estr_Ag vs DMSO 
## DataFrame with 6 rows and 5 columns
##         baseMean log2FoldChange     lfcSE      pvalue        padj
##        <numeric>      <numeric> <numeric>   <numeric>   <numeric>
## HSPH1   27065.21      -0.465322 0.0409830 4.37805e-32 6.26105e-28
## ABCA1    1963.62       0.770184 0.0781306 2.01958e-25 1.44410e-21
## HSPA6    2873.64      -2.835815 0.3242643 2.08779e-19 9.95250e-16
## JMJD6    4217.74      -0.387363 0.0488826 5.24660e-17 1.87579e-13
## HSPA1B  27804.87      -0.759272 0.1040977 4.94417e-16 1.41413e-12
## DNAJB1  13899.59      -0.525151 0.0766760 9.08612e-15 2.16568e-11
  • Genes modulated considering a threshold of 0.05 on the FDR: 420

  • Genes modulated considering a threshold of 0.05 on the FDR and of 1.5 on the FC: 11

  • Genes modulated considering a threshold of 0.05 on the FDR and of 2 on the FC: 4

  • Genes modulated considering a threshold of 0.01 on the FDR: 187

  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 11

  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 4

9.1.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

S <- grepl('DMSO|Estr', colnames(SE_DEA))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm::sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm::sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

DESeq2::plotMA(Res, ylim=c(-4,4))

Counts

plot_list = list()

for(i in 1:12){
  d <-plotCounts(dds, gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition",  returnData=TRUE)
  p <- ggplot(d, aes(x = Condition, y = count, color = Condition)) +
    geom_point(position=position_jitter(w = 0.1,h = 0)) +
    scale_x_discrete(guide = guide_axis(angle = 45)) +
    theme_bw() + ggtitle(row.names(Res[order(Res$padj), ])[i]) + theme(plot.title = element_text(hjust = 0.5))
  plot_list[[i]] <- p
}
grid.arrange(grobs = plot_list)

#  par(mfrow=c(3,4))
# 
# #grepl(Path, colnames(SE_DEA))
# 
# for(i in 1:12){
#   plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
# }

9.1.4 Store Results

DEA_Res[[Path]]$Agonist$Res <- Res
DEA_Res[[Path]]$Agonist$DEGs <- DEGs

rm(Res, DEGs)

9.2 Estrogen Inhibitor vs DMSO

9.2.1 DEA

res <- results(dds, contrast=c("Condition","Estr_Inh", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 2254, 16%
## LFC < 0 (down)     : 2484, 17%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

9.2.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "Estr_Inh", "DMSO"),  type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 2255, 16%
## LFC < 0 (down)     : 2489, 17%
## outliers [1]       : 4, 0.028%
## low counts [2]     : 0, 0%
## (mean count < 6)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition Estr_Inh vs DMSO 
## Wald test p-value: Condition Estr_Inh vs DMSO 
## DataFrame with 6 rows and 5 columns
##        baseMean log2FoldChange     lfcSE      pvalue        padj
##       <numeric>      <numeric> <numeric>   <numeric>   <numeric>
## ABCG1   820.080       1.671758 0.0903837 6.20576e-80 8.87485e-76
## H2AC6   839.295       1.097790 0.0606366 6.47468e-75 4.62972e-71
## ABCA1  1963.617       1.285080 0.0757288 9.94974e-67 4.74304e-63
## H2BC5  1200.982       0.820660 0.0569401 4.43743e-48 1.58649e-44
## HOXC4   650.767     -34.145954 2.6655283 6.26323e-38 1.79141e-34
## HSPA5 22792.713      -0.614463 0.0535087 4.25799e-31 1.01489e-27
  • Genes modulated considering a threshold of 0.05 on the FDR: 3754

  • Genes modulated considering a threshold of 0.05 on the FDR and of 1.5 on the FC: 428

  • Genes modulated considering a threshold of 0.05 on the FDR and of 2 on the FC: 82

  • Genes modulated considering a threshold of 0.01 on the FDR: 2417

  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 428

  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 82

9.2.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

S <- grepl('DMSO|Estr', colnames(SE_DEA))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm::sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm::sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

DESeq2::plotMA(Res, ylim=c(-4,4))

Counts

plot_list = list()

for(i in 1:12){
  d <-plotCounts(dds, gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition",  returnData=TRUE)
  p <- ggplot(d, aes(x = Condition, y = count, color = Condition)) +
    geom_point(position=position_jitter(w = 0.1,h = 0)) +
    scale_x_discrete(guide = guide_axis(angle = 45)) +
    theme_bw() + ggtitle(row.names(Res[order(Res$padj), ])[i]) + theme(plot.title = element_text(hjust = 0.5))
  plot_list[[i]] <- p
}
grid.arrange(grobs = plot_list)

#  par(mfrow=c(3,4))
# 
# #grepl(Path, colnames(SE_DEA))
# 
# for(i in 1:12){
#   plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
# }

9.2.4 Store Results

DEA_Res[[Path]]$Inhibitor$Res <- Res
DEA_Res[[Path]]$Inhibitor$DEGs <- DEGs

rm(Res, DEGs)

9.3 Estrogen Agonist vs Inhibitor

9.3.1 DEA

res <- results(dds, contrast=c("Condition","Estr_Ag", "Estr_Inh"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 2380, 17%
## LFC < 0 (down)     : 2124, 15%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

9.3.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "Estr_Ag", "Estr_Inh"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 2380, 17%
## LFC < 0 (down)     : 2124, 15%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition Estr_Ag vs Estr_Inh 
## Wald test p-value: Condition Estr_Ag vs Estr_Inh 
## DataFrame with 6 rows and 5 columns
##        baseMean log2FoldChange     lfcSE      pvalue        padj
##       <numeric>      <numeric> <numeric>   <numeric>   <numeric>
## H2BC5  1200.982      -1.037274 0.0643912 1.47184e-60 2.10370e-56
## H2AC6   839.295      -0.901371 0.0663335 2.19074e-43 1.56561e-39
## HSPH1 27065.207      -0.597584 0.0444523 7.38022e-42 3.51618e-38
## H2BC8   306.081      -1.590668 0.1282862 6.75370e-40 2.41327e-36
## HSPA6  2873.635      -4.308023 0.3630459 7.97991e-36 2.28114e-32
## ABCG1   820.080      -1.079099 0.0981065 9.01481e-31 2.14748e-27
  • Genes modulated considering a threshold of 0.05 on the FDR: 3565

  • Genes modulated considering a threshold of 0.05 on the FDR and of 1.5 on the FC: 304

  • Genes modulated considering a threshold of 0.05 on the FDR and of 2 on the FC: 58

  • Genes modulated considering a threshold of 0.01 on the FDR: 2178

  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 304

  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 58

9.3.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm::sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm::sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

DESeq2::plotMA(Res, ylim=c(-4,4))

Counts

plot_list = list()

for(i in 1:12){
  d <-plotCounts(dds, gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition",  returnData=TRUE)
  p <- ggplot(d, aes(x = Condition, y = count, color = Condition)) +
    geom_point(position=position_jitter(w = 0.1,h = 0)) +
    scale_x_discrete(guide = guide_axis(angle = 45)) +
    theme_bw() + ggtitle(row.names(Res[order(Res$padj), ])[i]) + theme(plot.title = element_text(hjust = 0.5))
  plot_list[[i]] <- p
}
grid.arrange(grobs = plot_list)

#  par(mfrow=c(3,4))
# 
# #grepl(Path, colnames(SE_DEA))
# 
# for(i in 1:12){
#   plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
# }

9.3.4 Store Results

DEA_Res[[Path]]$AgvsInh$Res <- Res
DEA_Res[[Path]]$AgvsInh$DEGs <- DEGs

rm(Res, DEGs)

10. Androgen treatment

10.1 Androgen Agonist vs DMSO

Path <- Treat[2]
Path
## [1] "Andr"

10.1.1 DEA

res <- results(dds, contrast=c("Condition","Andr_Ag", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4043, 28%
## LFC < 0 (down)     : 4129, 29%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

10.1.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "Andr_Ag", "DMSO"),  type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4043, 28%
## LFC < 0 (down)     : 4132, 29%
## outliers [1]       : 4, 0.028%
## low counts [2]     : 0, 0%
## (mean count < 6)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition Andr_Ag vs DMSO 
## Wald test p-value: Condition Andr_Ag vs DMSO 
## DataFrame with 6 rows and 5 columns
##           baseMean log2FoldChange     lfcSE    pvalue      padj
##          <numeric>      <numeric> <numeric> <numeric> <numeric>
## FKBP4     12266.28        1.89437 0.0441213         0         0
## HSP90AA1 145932.01        2.50704 0.0539842         0         0
## HSP90AB1  89908.54        1.74126 0.0392218         0         0
## AHSA1      7374.07        1.56965 0.0407279         0         0
## DNAJB6     7435.81        1.11694 0.0295229         0         0
## HSPA8     90416.79        1.43222 0.0350118         0         0
  • Genes modulated considering a threshold of 0.05 on the FDR: 7212

  • Genes modulated considering a threshold of 0.05 on the FDR and of 1.5 on the FC: 974

  • Genes modulated considering a threshold of 0.05 on the FDR and of 2 on the FC: 242

  • Genes modulated considering a threshold of 0.01 on the FDR: 5664

  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 974

  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 242

10.1.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

S <- grepl('DMSO|Andr', colnames(SE_DEA))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm::sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm::sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

DESeq2::plotMA(Res, ylim=c(-4,4))

Counts

plot_list = list()

for(i in 1:12){
  d <-plotCounts(dds, gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition",  returnData=TRUE)
  p <- ggplot(d, aes(x = Condition, y = count, color = Condition)) +
    geom_point(position=position_jitter(w = 0.1,h = 0)) +
    scale_x_discrete(guide = guide_axis(angle = 45)) +
    theme_bw() + ggtitle(row.names(Res[order(Res$padj), ])[i]) + theme(plot.title = element_text(hjust = 0.5))
  plot_list[[i]] <- p
}
grid.arrange(grobs = plot_list)

#  par(mfrow=c(3,4))
# 
# #grepl(Path, colnames(SE_DEA))
# 
# for(i in 1:12){
#   plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
# }

10.1.4 Store Results

DEA_Res[[Path]]$Agonist$Res <- Res
DEA_Res[[Path]]$Agonist$DEGs <- DEGs

rm(Res, DEGs)

10.2 Androgen Inhibitor vs DMSO

10.2.1 DEA

res <- results(dds, contrast=c("Condition","Andr_Inh", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 3028, 21%
## LFC < 0 (down)     : 2699, 19%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

10.2.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "Andr_Inh", "DMSO"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 3028, 21%
## LFC < 0 (down)     : 2699, 19%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition Andr_Inh vs DMSO 
## Wald test p-value: Condition Andr_Inh vs DMSO 
## DataFrame with 6 rows and 5 columns
##          baseMean log2FoldChange     lfcSE      pvalue        padj
##         <numeric>      <numeric> <numeric>   <numeric>   <numeric>
## JMJD6     4217.74      -0.906308 0.0502342 1.67154e-75 2.38913e-71
## H2BC5     1200.98      -0.940788 0.0676905 1.18202e-46 8.44733e-43
## HSPH1    27065.21      -0.549985 0.0409838 1.00067e-42 4.76754e-39
## RBM3      2962.18       0.529104 0.0423411 1.32514e-37 4.73505e-34
## HERPUD1   3170.90      -0.636139 0.0553832 3.15815e-32 9.02790e-29
## FUS      18901.35       0.452030 0.0435037 1.67037e-27 3.97909e-24
  • Genes modulated considering a threshold of 0.05 on the FDR: 4671

  • Genes modulated considering a threshold of 0.05 on the FDR and of 1.5 on the FC: 163

  • Genes modulated considering a threshold of 0.05 on the FDR and of 2 on the FC: 31

  • Genes modulated considering a threshold of 0.01 on the FDR: 3009

  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 163

  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 31

10.2.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm::sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm::sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

DESeq2::plotMA(Res, ylim=c(-4,4))

Counts

plot_list = list()

for(i in 1:12){
  d <-plotCounts(dds, gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition",  returnData=TRUE)
  p <- ggplot(d, aes(x = Condition, y = count, color = Condition)) +
    geom_point(position=position_jitter(w = 0.1,h = 0)) +
    scale_x_discrete(guide = guide_axis(angle = 45)) +
    theme_bw() + ggtitle(row.names(Res[order(Res$padj), ])[i]) + theme(plot.title = element_text(hjust = 0.5))
  plot_list[[i]] <- p
}
grid.arrange(grobs = plot_list)

#  par(mfrow=c(3,4))
# 
# #grepl(Path, colnames(SE_DEA))
# 
# for(i in 1:12){
#   plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
# }

10.2.4 Store Results

DEA_Res[[Path]]$Inhibitor$Res <- Res
DEA_Res[[Path]]$Inhibitor$DEGs <- DEGs

rm(Res, DEGs)

10.3 Androgen Agonist vs Inhibitor

10.3.1 DEA

res <- results(dds, contrast=c("Condition","Andr_Ag", "Andr_Inh"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4437, 31%
## LFC < 0 (down)     : 4539, 32%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

10.3.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "Andr_Ag", "Andr_Inh"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4437, 31%
## LFC < 0 (down)     : 4539, 32%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition Andr_Ag vs Andr_Inh 
## Wald test p-value: Condition Andr_Ag vs Andr_Inh 
## DataFrame with 6 rows and 5 columns
##           baseMean log2FoldChange     lfcSE    pvalue      padj
##          <numeric>      <numeric> <numeric> <numeric> <numeric>
## FKBP4     12266.28        2.16769 0.0486123         0         0
## JMJD6      4217.74        2.16313 0.0537576         0         0
## HSP90AA1 145932.01        2.80136 0.0592711         0         0
## HSP90AB1  89908.54        1.90956 0.0429571         0         0
## AHSA1      7374.07        1.83104 0.0450324         0         0
## DNAJB6     7435.81        1.32446 0.0327931         0         0
  • Genes modulated considering a threshold of 0.05 on the FDR: 8181

  • Genes modulated considering a threshold of 0.05 on the FDR and of 1.5 on the FC: 1825

  • Genes modulated considering a threshold of 0.05 on the FDR and of 2 on the FC: 550

  • Genes modulated considering a threshold of 0.01 on the FDR: 6806

  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 1809

  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 550

10.3.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm::sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm::sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

DESeq2::plotMA(Res, ylim=c(-4,4))

Counts

plot_list = list()

for(i in 1:12){
  d <-plotCounts(dds, gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition",  returnData=TRUE)
  p <- ggplot(d, aes(x = Condition, y = count, color = Condition)) +
    geom_point(position=position_jitter(w = 0.1,h = 0)) +
    scale_x_discrete(guide = guide_axis(angle = 45)) +
    theme_bw() + ggtitle(row.names(Res[order(Res$padj), ])[i]) + theme(plot.title = element_text(hjust = 0.5))
  plot_list[[i]] <- p
}
grid.arrange(grobs = plot_list)

#  par(mfrow=c(3,4))
# 
# #grepl(Path, colnames(SE_DEA))
# 
# for(i in 1:12){
#   plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
# }

10.3.4 Store Results

DEA_Res[[Path]]$AgvsInh$Res <- Res
DEA_Res[[Path]]$AgvsInh$DEGs <- DEGs

rm(Res, DEGs)

11. Glucocorticoid treatment

11.1 Glucocorticoid Agonist vs DMSO

Path <- Treat[4]
Path
## [1] "GC"

11.1.1 DEA

res <- results(dds, contrast=c("Condition","GC_Ag", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 1517, 11%
## LFC < 0 (down)     : 1495, 10%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

11.1.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "GC_Ag", "DMSO"),  type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 1515, 11%
## LFC < 0 (down)     : 1495, 10%
## outliers [1]       : 4, 0.028%
## low counts [2]     : 0, 0%
## (mean count < 6)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition GC_Ag vs DMSO 
## Wald test p-value: Condition GC_Ag vs DMSO 
## DataFrame with 6 rows and 5 columns
##          baseMean log2FoldChange     lfcSE       pvalue         padj
##         <numeric>      <numeric> <numeric>    <numeric>    <numeric>
## HSPA8    90416.79      -1.319637 0.0351224  0.00000e+00  0.00000e+00
## HSPH1    27065.21      -1.218000 0.0409688 3.11077e-196 2.22436e-192
## HSPA5    22792.71      -1.334309 0.0538253 7.58495e-138 3.61574e-134
## PDIA4     6259.54      -0.940970 0.0483612  3.54799e-86  1.26850e-82
## HSP90B1  20322.88      -1.003832 0.0521580  2.07369e-84  5.93117e-81
## FKBP4    12266.28      -0.840932 0.0449425  6.16916e-80  1.47042e-76
  • Genes modulated considering a threshold of 0.05 on the FDR: 2276

  • Genes modulated considering a threshold of 0.05 on the FDR and of 1.5 on the FC: 84

  • Genes modulated considering a threshold of 0.05 on the FDR and of 2 on the FC: 19

  • Genes modulated considering a threshold of 0.01 on the FDR: 1270

  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 84

  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 19

11.1.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

S <- grepl('DMSO|GC', colnames(SE_DEA))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm::sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm::sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

DESeq2::plotMA(Res, ylim=c(-4,4))

Counts

plot_list = list()

for(i in 1:12){
  d <-plotCounts(dds, gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition",  returnData=TRUE)
  p <- ggplot(d, aes(x = Condition, y = count, color = Condition)) +
    geom_point(position=position_jitter(w = 0.1,h = 0)) +
    scale_x_discrete(guide = guide_axis(angle = 45)) +
    theme_bw() + ggtitle(row.names(Res[order(Res$padj), ])[i]) + theme(plot.title = element_text(hjust = 0.5))
  plot_list[[i]] <- p
}
grid.arrange(grobs = plot_list)

#  par(mfrow=c(3,4))
# 
# #grepl(Path, colnames(SE_DEA))
# 
# for(i in 1:12){
#   plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
# }

11.1.4 Store Results

DEA_Res[[Path]]$Agonist$Res <- Res
DEA_Res[[Path]]$Agonist$DEGs <- DEGs

rm(Res, DEGs)

11.2 Glucocorticoid Inhibitor vs DMSO

11.2.1 DEA

res <- results(dds, contrast=c("Condition","GC_Inh", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4068, 28%
## LFC < 0 (down)     : 4164, 29%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

11.2.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "GC_Inh", "DMSO"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4068, 28%
## LFC < 0 (down)     : 4164, 29%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition GC_Inh vs DMSO 
## Wald test p-value: Condition GC_Inh vs DMSO 
## DataFrame with 6 rows and 5 columns
##        baseMean log2FoldChange     lfcSE       pvalue         padj
##       <numeric>      <numeric> <numeric>    <numeric>    <numeric>
## HSPH1 27065.207        1.81282 0.0402539  0.00000e+00  0.00000e+00
## H2AC6   839.295        2.17645 0.0575191  0.00000e+00  0.00000e+00
## CPT1A   348.222        3.49757 0.1063571 7.72917e-241 3.68243e-237
## HSPA8 90416.792        1.11784 0.0349804 1.75319e-225 6.26459e-222
## HSPA4 12834.121        0.98296 0.0314856 3.77683e-215 1.07964e-211
## H2BC5  1200.982        1.68967 0.0550127 8.32191e-209 1.98242e-205
  • Genes modulated considering a threshold of 0.05 on the FDR: 7401

  • Genes modulated considering a threshold of 0.05 on the FDR and of 1.5 on the FC: 1253

  • Genes modulated considering a threshold of 0.05 on the FDR and of 2 on the FC: 437

  • Genes modulated considering a threshold of 0.01 on the FDR: 5976

  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 1251

  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 437

11.2.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm::sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm::sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

DESeq2::plotMA(Res, ylim=c(-4,4))

Counts

plot_list = list()

for(i in 1:12){
  d <-plotCounts(dds, gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition",  returnData=TRUE)
  p <- ggplot(d, aes(x = Condition, y = count, color = Condition)) +
    geom_point(position=position_jitter(w = 0.1,h = 0)) +
    scale_x_discrete(guide = guide_axis(angle = 45)) +
    theme_bw() + ggtitle(row.names(Res[order(Res$padj), ])[i]) + theme(plot.title = element_text(hjust = 0.5))
  plot_list[[i]] <- p
}
grid.arrange(grobs = plot_list)

#  par(mfrow=c(3,4))
# 
# #grepl(Path, colnames(SE_DEA))
# 
# for(i in 1:12){
#   plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
# }

11.2.4 Store Results

DEA_Res[[Path]]$Inhibitor$Res <- Res
DEA_Res[[Path]]$Inhibitor$DEGs <- DEGs

rm(Res, DEGs)

11.3 Glucocorticoid Agonist vs Inhibitor

11.3.1 DEA

res <- results(dds, contrast=c("Condition","GC_Ag", "GC_Inh"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4336, 30%
## LFC < 0 (down)     : 4216, 29%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

11.3.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "GC_Ag", "GC_Inh"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4336, 30%
## LFC < 0 (down)     : 4216, 29%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition GC_Ag vs GC_Inh 
## Wald test p-value: Condition GC_Ag vs GC_Inh 
## DataFrame with 6 rows and 5 columns
##         baseMean log2FoldChange     lfcSE    pvalue      padj
##        <numeric>      <numeric> <numeric> <numeric> <numeric>
## FKBP4   12266.28       -2.10078 0.0486893         0         0
## DNAJA1  23425.52       -1.56909 0.0416437         0         0
## AHSA1    7374.07       -1.79771 0.0449504         0         0
## HSPA8   90416.79       -2.44169 0.0384249         0         0
## CACYBP   5709.99       -1.83065 0.0454716         0         0
## HSPH1   27065.21       -3.03730 0.0445834         0         0
  • Genes modulated considering a threshold of 0.05 on the FDR: 7714

  • Genes modulated considering a threshold of 0.05 on the FDR and of 1.5 on the FC: 1532

  • Genes modulated considering a threshold of 0.05 on the FDR and of 2 on the FC: 544

  • Genes modulated considering a threshold of 0.01 on the FDR: 6252

  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 1523

  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 544

11.3.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm::sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm::sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

DESeq2::plotMA(Res, ylim=c(-4,4))

Counts

plot_list = list()

for(i in 1:12){
  d <-plotCounts(dds, gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition",  returnData=TRUE)
  p <- ggplot(d, aes(x = Condition, y = count, color = Condition)) +
    geom_point(position=position_jitter(w = 0.1,h = 0)) +
    scale_x_discrete(guide = guide_axis(angle = 45)) +
    theme_bw() + ggtitle(row.names(Res[order(Res$padj), ])[i]) + theme(plot.title = element_text(hjust = 0.5))
  plot_list[[i]] <- p
}
grid.arrange(grobs = plot_list)

#  par(mfrow=c(3,4))
# 
# #grepl(Path, colnames(SE_DEA))
# 
# for(i in 1:12){
#   plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
# }

11.3.4 Store Results

DEA_Res[[Path]]$AgvsInh$Res <- Res
DEA_Res[[Path]]$AgvsInh$DEGs <- DEGs

rm(Res, DEGs)

12. Arhyl Hydrocarbon treatment

12.1 Arhyl Hydrocarbon Agonist vs DMSO

Path <- Treat[1]
Path
## [1] "AhHyd"

12.1.1 DEA

res <- results(dds, contrast=c("Condition","AhHyd_Ag", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4271, 30%
## LFC < 0 (down)     : 4265, 30%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

12.1.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "AhHyd_Ag", "DMSO"),  type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4272, 30%
## LFC < 0 (down)     : 4268, 30%
## outliers [1]       : 4, 0.028%
## low counts [2]     : 0, 0%
## (mean count < 6)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition AhHyd_Ag vs DMSO 
## Wald test p-value: Condition AhHyd_Ag vs DMSO 
## DataFrame with 6 rows and 5 columns
##         baseMean log2FoldChange     lfcSE      pvalue        padj
##        <numeric>      <numeric> <numeric>   <numeric>   <numeric>
## H3-3B   19403.26       0.713773 0.0416281 2.58662e-68 3.69913e-64
## ARID2    2916.44      -0.733805 0.0472585 6.36880e-57 4.55401e-53
## ARRDC3   9537.29       0.749603 0.0510374 1.71450e-51 8.17304e-48
## PDIA4    6259.54       0.662324 0.0475460 3.53658e-46 1.26441e-42
## SC5D     6086.77      -0.713442 0.0519291 2.29676e-45 6.56919e-42
## FBXO30   2422.05      -0.772746 0.0565896 2.95442e-45 7.04187e-42
  • Genes modulated considering a threshold of 0.05 on the FDR: 7633

  • Genes modulated considering a threshold of 0.05 on the FDR and of 1.5 on the FC: 516

  • Genes modulated considering a threshold of 0.05 on the FDR and of 2 on the FC: 76

  • Genes modulated considering a threshold of 0.01 on the FDR: 6057

  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 516

  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 76

12.1.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

S <- grepl('DMSO|AhHyd', colnames(SE_DEA))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm::sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm::sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

DESeq2::plotMA(Res, ylim=c(-4,4))

Counts

plot_list = list()

for(i in 1:12){
  d <-plotCounts(dds, gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition",  returnData=TRUE)
  p <- ggplot(d, aes(x = Condition, y = count, color = Condition)) +
    geom_point(position=position_jitter(w = 0.1,h = 0)) +
    scale_x_discrete(guide = guide_axis(angle = 45)) +
    theme_bw() + ggtitle(row.names(Res[order(Res$padj), ])[i]) + theme(plot.title = element_text(hjust = 0.5))
  plot_list[[i]] <- p
}
grid.arrange(grobs = plot_list)

#  par(mfrow=c(3,4))
# 
# #grepl(Path, colnames(SE_DEA))
# 
# for(i in 1:12){
#   plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
# }

12.1.4 Store Results

DEA_Res[[Path]]$Agonist$Res <- Res
DEA_Res[[Path]]$Agonist$DEGs <- DEGs

rm(Res, DEGs)

12.2 Arhyl Hydrocarbon Inhibitor vs DMSO

12.2.1 DEA

res <- results(dds, contrast=c("Condition","AhHyd_Inh", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4451, 31%
## LFC < 0 (down)     : 4315, 30%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

12.2.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "AhHyd_Inh", "DMSO"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4451, 31%
## LFC < 0 (down)     : 4315, 30%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition AhHyd_Inh vs DMSO 
## Wald test p-value: Condition AhHyd_Inh vs DMSO 
## DataFrame with 6 rows and 5 columns
##         baseMean log2FoldChange     lfcSE       pvalue         padj
##        <numeric>      <numeric> <numeric>    <numeric>    <numeric>
## HSPA8   90416.79      -1.346595 0.0351719  0.00000e+00  0.00000e+00
## HSPH1   27065.21      -1.436492 0.0413204 8.86039e-267 6.33208e-263
## HSPA5   22792.71      -1.196677 0.0538258 8.49885e-111 4.04913e-107
## JMJD6    4217.74      -1.016166 0.0498247  1.75437e-93  6.26882e-90
## RBM3     2962.18       0.805775 0.0417675  4.16093e-84  1.18944e-80
## DNAJA1  23425.52      -0.728056 0.0384298  1.96303e-81  4.67626e-78
  • Genes modulated considering a threshold of 0.05 on the FDR: 7923

  • Genes modulated considering a threshold of 0.05 on the FDR and of 1.5 on the FC: 1234

  • Genes modulated considering a threshold of 0.05 on the FDR and of 2 on the FC: 363

  • Genes modulated considering a threshold of 0.01 on the FDR: 6351

  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 1233

  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 363

12.2.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm::sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm::sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

DESeq2::plotMA(Res, ylim=c(-4,4))

Counts

plot_list = list()

for(i in 1:12){
  d <-plotCounts(dds, gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition",  returnData=TRUE)
  p <- ggplot(d, aes(x = Condition, y = count, color = Condition)) +
    geom_point(position=position_jitter(w = 0.1,h = 0)) +
    scale_x_discrete(guide = guide_axis(angle = 45)) +
    theme_bw() + ggtitle(row.names(Res[order(Res$padj), ])[i]) + theme(plot.title = element_text(hjust = 0.5))
  plot_list[[i]] <- p
}
grid.arrange(grobs = plot_list)

#  par(mfrow=c(3,4))
# 
# #grepl(Path, colnames(SE_DEA))
# 
# for(i in 1:12){
#   plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
# }

12.2.4 Store Results

DEA_Res[[Path]]$Inhibitor$Res <- Res
DEA_Res[[Path]]$Inhibitor$DEGs <- DEGs

rm(Res, DEGs)

12.3 Arhyl Hydrocarbon Agonist vs Inhibitor

12.3.1 DEA

res <- results(dds, contrast=c("Condition","AhHyd_Ag", "AhHyd_Inh"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 3753, 26%
## LFC < 0 (down)     : 4397, 31%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

12.3.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "AhHyd_Ag", "AhHyd_Inh"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14305 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 3753, 26%
## LFC < 0 (down)     : 4397, 31%
## outliers [1]       : 12, 0.084%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition AhHyd_Ag vs AhHyd_Inh 
## Wald test p-value: Condition AhHyd_Ag vs AhHyd_Inh 
## DataFrame with 6 rows and 5 columns
##          baseMean log2FoldChange     lfcSE       pvalue         padj
##         <numeric>      <numeric> <numeric>    <numeric>    <numeric>
## HSPA8     90416.8        1.61128 0.0384766  0.00000e+00  0.00000e+00
## HSPH1     27065.2        1.80124 0.0450304  0.00000e+00  0.00000e+00
## HSPA5     22792.7        1.89718 0.0588832 4.33496e-230 2.06532e-226
## FKBP4     12266.3        1.22264 0.0488458 2.20576e-139 7.88172e-136
## HSP90B1   20322.9        1.39746 0.0569125 1.59091e-134 4.54779e-131
## CALR      19128.8        1.27685 0.0538480 1.76612e-125 4.20720e-122
  • Genes modulated considering a threshold of 0.05 on the FDR: 7200

  • Genes modulated considering a threshold of 0.05 on the FDR and of 1.5 on the FC: 1274

  • Genes modulated considering a threshold of 0.05 on the FDR and of 2 on the FC: 390

  • Genes modulated considering a threshold of 0.01 on the FDR: 5568

  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 1262

  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 390

12.3.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm::sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm::sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",  top_annotation=c('Condition'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

DESeq2::plotMA(Res, ylim=c(-4,4))

Counts

plot_list = list()

for(i in 1:12){
  d <-plotCounts(dds, gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition",  returnData=TRUE)
  p <- ggplot(d, aes(x = Condition, y = count, color = Condition)) +
    geom_point(position=position_jitter(w = 0.1,h = 0)) +
    scale_x_discrete(guide = guide_axis(angle = 45)) +
    theme_bw() + ggtitle(row.names(Res[order(Res$padj), ])[i]) + theme(plot.title = element_text(hjust = 0.5))
  plot_list[[i]] <- p
}
grid.arrange(grobs = plot_list)

#  par(mfrow=c(3,4))
# 
# #grepl(Path, colnames(SE_DEA))
# 
# for(i in 1:12){
#   plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
# }

12.3.4 Store Results

DEA_Res[[Path]]$AgvsInh$Res <- Res
DEA_Res[[Path]]$AgvsInh$DEGs <- DEGs

rm(Res, DEGs)

13. Savings

saveRDS(DEA_Res, paste0(OutputFolder, '/', 'DEARes.rds'))
saveRDS(SE_DEA, paste0(OutputFolder, '/', 'SE_DEA.rds'))
SessionInfo <- sessionInfo()
Date <- date()
save.image(paste0(OutputFolder, '/', 'DEA_CTL04.RData'))
Date
## [1] "Fri Jul 18 14:07:31 2025"
SessionInfo
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] gridExtra_2.3               ashr_2.2-54                
##  [3] sechm_1.6.0                 ggplot2_3.4.1              
##  [5] DESeq2_1.38.3               SummarizedExperiment_1.28.0
##  [7] Biobase_2.58.0              MatrixGenerics_1.10.0      
##  [9] matrixStats_0.63.0          GenomicRanges_1.50.2       
## [11] GenomeInfoDb_1.34.9         IRanges_2.32.0             
## [13] S4Vectors_0.36.1            BiocGenerics_0.44.0        
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-7           bit64_4.0.5            doParallel_1.0.17     
##  [4] RColorBrewer_1.1-3     httr_1.4.5             tools_4.2.1           
##  [7] bslib_0.4.2            irlba_2.3.5.1          utf8_1.2.3            
## [10] R6_2.5.1               DBI_1.1.3              colorspace_2.1-0      
## [13] GetoptLong_1.0.5       withr_2.5.0            tidyselect_1.2.0      
## [16] bit_4.0.5              curl_5.0.0             compiler_4.2.1        
## [19] cli_3.6.1              TSP_1.2-2              DelayedArray_0.24.0   
## [22] labeling_0.4.2         sass_0.4.5             scales_1.2.1          
## [25] SQUAREM_2021.1         mixsqp_0.3-48          stringr_1.5.0         
## [28] digest_0.6.31          rmarkdown_2.20         ca_0.71.1             
## [31] XVector_0.38.0         pkgconfig_2.0.3        htmltools_0.5.4       
## [34] highr_0.10             invgamma_1.1           fastmap_1.1.1         
## [37] rlang_1.1.1            GlobalOptions_0.1.2    rstudioapi_0.14       
## [40] RSQLite_2.3.0          farver_2.1.1           shape_1.4.6           
## [43] jquerylib_0.1.4        generics_0.1.3         jsonlite_1.8.4        
## [46] BiocParallel_1.32.5    dplyr_1.1.0            RCurl_1.98-1.10       
## [49] magrittr_2.0.3         GenomeInfoDbData_1.2.9 Matrix_1.5-3          
## [52] Rcpp_1.0.10            munsell_0.5.0          fansi_1.0.4           
## [55] lifecycle_1.0.3        stringi_1.7.12         yaml_2.3.7            
## [58] zlibbioc_1.44.0        Rtsne_0.16             grid_4.2.1            
## [61] blob_1.2.3             parallel_4.2.1         randomcoloR_1.1.0.1   
## [64] crayon_1.5.2           lattice_0.20-45        Biostrings_2.66.0     
## [67] annotate_1.76.0        circlize_0.4.15        KEGGREST_1.38.0       
## [70] locfit_1.5-9.7         knitr_1.42             ComplexHeatmap_2.14.0 
## [73] pillar_1.8.1           rjson_0.2.21           geneplotter_1.76.0    
## [76] codetools_0.2-19       XML_3.99-0.13          glue_1.6.2            
## [79] evaluate_0.20          V8_4.2.2               png_0.1-8             
## [82] vctrs_0.6.2            foreach_1.5.2          gtable_0.3.1          
## [85] clue_0.3-64            cachem_1.0.7           xfun_0.37             
## [88] xtable_1.8-4           truncnorm_1.0-8        seriation_1.4.1       
## [91] tibble_3.2.1           iterators_1.0.14       registry_0.5-1        
## [94] AnnotationDbi_1.60.0   memoise_2.0.1          cluster_2.1.4