Differential expression analysis on CTL08.

1. Environment Set Up

Values of RMarkdown parameters

for (i in 1:length(params))
  print(paste('Parameter:', names(params)[i], ' - Value:', params[[i]], '- Class:', class(params[[i]])))
## [1] "Parameter: Dataset  - Value: CTL08 - Class: character"
## [1] "Parameter: SE  - Value: ~/DataDir/bulkRNASeq/3.DataExploration/CTL08/Output/AfterSamplesSelection/MaleLineSE_Bio.rds - Class: character"
## [1] "Parameter: OutputFolder  - Value: ~/DataDir/bulkRNASeq/5.DifferentialExpression/CTL08/ - Class: character"
library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(SummarizedExperiment)
library(ggplot2)
library(sechm)
library(ashr)
Dataset <- params$Dataset
OutputFolder <- params$OutputFolder

#if (dir.exists(OutputFolder) == FALSE) {
#  dir.create(OutputFolder, recursive=FALSE)
#}

2. Data Upload

2.1 SE object

SE object created from exploratory analysis. Steps already performed: gene selection according to biotype.

SE_Bio <- readRDS(params$SE)

2.2 Discard duplicated gene names

SE_Bio <- SE_Bio[!duplicated(rowData(SE_Bio)$GeneName), ]
rownames(SE_Bio) <- rowData(SE_Bio)$GeneName

2.3 Sample selection

I am starting from a SE object in which low-quality data have already been discarded.

SE object containing information about 19892 genes in 51 samples.


3. Generation of dds

For the design

  • Condition to specify treatment
  • SeqRun as co-variable (as categorical variable)
CountMatrix <- assays(SE_Bio)$counts
SampleMeta <- DataFrame(colData(SE_Bio))

SampleMeta$SeqRun <- as.factor(as.character(SampleMeta$SeqRun))

if(! all(rownames(SampleMeta) == colnames(CountMatrix))){
  stop('Inconsistency in count matrix and sample metadata')
}
dds <- DESeqDataSetFromMatrix(countData=assays(SE_Bio)$counts, SampleMeta, ~ SeqRun + Condition)
## converting counts to integer mode

# to add gene metadata
mcols(dds) <- DataFrame(mcols(dds), DataFrame(rowData(SE_Bio)))

dds
## class: DESeqDataSet 
## dim: 19892 51 
## metadata(1): version
## assays(1): counts
## rownames(19892): TSPAN6 TNMD ... AC114402.3 AC084756.2
## rowData names(9): Gene EnsGene ... Start End
## colnames(51): S32829_H12_DMSO S32830_H14_Ret_Ag ... MTR38_S44_AhHyd_Ag
##   MTR39_S49_AhHyd_Ag
## colData names(19): InternalUniqueID HRID ... SeqPlatform lib.size

4. Gene filtering

4.1 Keep only protein-coding genes

keep <- mcols(dds)$GeneBiotype=="protein_coding"
table(keep)
## keep
##  TRUE 
## 19892
dds <- dds[keep,]

4.2 Discard very lowly expressed genes

keep <- rowSums(counts(dds)>=30) >= 10

table(keep)
## keep
## FALSE  TRUE 
##  5042 14850
dds <- dds[keep,]

dds
## class: DESeqDataSet 
## dim: 14850 51 
## metadata(1): version
## assays(1): counts
## rownames(14850): TSPAN6 TNMD ... AC013477.2 AC005192.1
## rowData names(9): Gene EnsGene ... Start End
## colnames(51): S32829_H12_DMSO S32830_H14_Ret_Ag ... MTR38_S44_AhHyd_Ag
##   MTR39_S49_AhHyd_Ag
## colData names(19): InternalUniqueID HRID ... SeqPlatform lib.size

A dds object containing information about 14850 genes in 51 samples is tested for differential expression.


5. Standard DEA

5.1 Size Factors

dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## 1 rows did not converge in beta, labelled in mcols(object)$betaConv. Use larger maxit argument with nbinomWaldTest
sizeFactors(dds)
##        S32829_H12_DMSO      S32830_H14_Ret_Ag      S32831_H15_Ret_Ag 
##              0.7552176              1.3554679              1.5570976 
##    S32833_H27_Thyr_Inh    S32837_H35_LivX_Inh    S32838_H36_LivX_Inh 
##              1.6418382              1.3202859              1.6472661 
##     S32839_H38_LivX_Ag     S32840_H39_LivX_Ag     S32841_H41_Estr_Ag 
##              1.2597147              0.8509019              1.1332137 
##     S32842_H42_Estr_Ag     S32845_H51_Andr_Ag    S32846_H52_Andr_Inh 
##              1.0085647              1.0751976              1.7938036 
##    S32847_H53_Andr_Inh    S32848_H54_Andr_Inh      S32849_H60_GC_Inh 
##              1.4488644              0.6893375              1.7261740 
##   S32853_H68_AhHyd_Inh   S32854_H69_AhHyd_Inh     S32857_H22_Thyr_Ag 
##              1.8872572              0.6474289              1.3064348 
##    S32858_H25_Thyr_Inh         S32860_H9_DMSO     S32861_H17_Ret_Inh 
##              1.3466527              1.1099733              1.5767777 
##     S32862_H18_Ret_Inh      S32864_H13_Ret_Ag     S32865_H23_Thyr_Ag 
##              0.7835735              0.5567720              0.8261559 
##       S35870_MTR3_DMSO   S35872_MTR5_Thyr_Inh   S35875_MTR8_LivX_Inh 
##              0.6197050              1.1566085              1.2762796 
##    S35876_MTR9_LivX_Ag   S35879_MTR12_Andr_Ag    S35881_MTR14_GC_Inh 
##              1.2051410              0.9977699              1.2717297 
## S35882_MTR15_AhHyd_Inh          MTR16_S34_CTL          MTR17_S38_CTL 
##              1.2979921              0.6883420              0.8858210 
##          MTR19_S41_CTL          MTR20_S48_CTL          MTR21_S29_CTL 
##              0.9645505              0.5918533              0.9678453 
##         MTR22_S46_DMSO         MTR23_S28_DMSO         MTR24_S51_DMSO 
##              0.8385289              0.8225395              0.7630286 
##         MTR25_S43_DMSO         MTR26_S33_DMSO      MTR27_S53_Thyr_Ag 
##              0.7893820              0.7321577              0.8748039 
##      MTR28_S50_Thyr_Ag      MTR29_S30_Andr_Ag      MTR30_S42_Andr_Ag 
##              0.8699931              0.9699616              0.9857005 
##        MTR31_S47_GC_Ag        MTR32_S35_GC_Ag        MTR33_S37_GC_Ag 
##              0.9524663              0.8375731              0.5889336 
##     MTR37_S52_AhHyd_Ag     MTR38_S44_AhHyd_Ag     MTR39_S49_AhHyd_Ag 
##              0.8053541              0.8512655              0.7365260
SF <-data.frame(Sample=names(sizeFactors(dds)), SizeF=sizeFactors(dds))

ggplot(SF, aes(Sample, SizeF)) +
  geom_point() + guides(x =  guide_axis(angle = 90)) +
  ylim(0, 1.5)

5.2 Vst Transformation

SE_DEA <- as(dds, "RangedSummarizedExperiment")
assays(SE_DEA)$vst <- assay(vst(dds, blind=TRUE))
metadata(SE_DEA)$anno_colors <- list(Condition = c('DMSO' = 'grey30', 'CTL' = 'azure3', 
                 'AhHyd_Ag'='#F8766D', 'AhHyd_Inh'='#F8766D50',
                 'Andr_Ag'='#fccb17', 'Andr_Inh'='#C49A0050',  
                 "Estr_Ag"= '#53B400', "Estr_Inh"= '#53B40050', 
                 'GC_Ag' = '#00C094', 'GC_Inh' = '#00C09450',
                 'LivX_Ag' = '#00B6EB', 'LivX_Inh' = '#00B6EB50', 
                 'Ret_Ag' = '#A58AFF', 'Ret_Inh' = '#A58AFF50', 
                 'Thyr_Ag' = '#FB61D7', 'Thyr_Inh' = '#FB61D750'
                 ), SeqRun = c("20210310" = "orange", "20210724"  = "green4", "20220422" = 
                                          "lightblue"))

5.3 Create List to store results

Treat <- unique(SE_Bio$Pathway)[-1]

Treat
## [1] "Ret"   "Thyr"  "LivX"  "Estr"  "Andr"  "GC"    "AhHyd"
DEA_Res <- list()

for(i in 1:length(Treat)){
  DEA_Res[[i]] <- list()}

names(DEA_Res) <- Treat

6. Thyroid treatment

6.1 Thyroid Agonist vs DMSO

Path <- Treat[2]

6.1.1 DEA

res <- results(dds, contrast=c("Condition","Thyr_Ag", "DMSO"), alpha=0.01, cooksCutoff=0.90, independentFiltering=FALSE)
summary(res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 1274, 8.6%
## LFC < 0 (down)     : 668, 4.5%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

6.1.2 Fold-change shrinkage

Apply a shrinkage of the fold-change to try to minimize outlier value influence. Considring that I am using the constrast option to retrieve the results, I cannot rely on apleglm default algorithm. Since at the moment I am not using the lfcThreshold option, I decide for the ashr algorithm.

Res <- lfcShrink(dds, contrast=c("Condition", "Thyr_Ag", "DMSO"),  type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 1275, 8.6%
## LFC < 0 (down)     : 668, 4.5%
## outliers [1]       : 2, 0.013%
## low counts [2]     : 0, 0%
## (mean count < 11)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition Thyr_Ag vs DMSO 
## Wald test p-value: Condition Thyr_Ag vs DMSO 
## DataFrame with 6 rows and 5 columns
##         baseMean log2FoldChange     lfcSE      pvalue        padj
##        <numeric>      <numeric> <numeric>   <numeric>   <numeric>
## H1-2   2271.6009      -3.057333  0.408414 2.16552e-16 3.21537e-12
## H2BC5  2477.4868      -1.483131  0.196168 9.06357e-15 6.72879e-11
## PDK1    753.5557       1.290057  0.187980 1.76920e-12 8.75637e-09
## H2BC21 4323.4556      -0.936279  0.156468 3.39500e-11 1.26022e-07
## SCGN     84.2063       5.449211  0.976115 1.44749e-10 4.29845e-07
## H2AC11  288.3908      -1.222039  0.199996 1.74069e-10 4.30763e-07
  • Genes modulated considering a threshold of 0.01 on the FDR: 280
  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 215
  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 164

6.1.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

S <- grepl('DMSO|Thyr', colnames(SE_DEA))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

plotMA(Res, ylim=c(-4,4))

Counts

par(mfrow=c(3,4))

#grepl(Path, colnames(SE_DEA))

for(i in 1:12){
  plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
}

6.1.4 Store Results

DEA_Res[[Path]]$Agonist$Res <- Res
DEA_Res[[Path]]$Agonist$DEGs <- DEGs

names(DEA_Res[[Path]])
## [1] "Agonist"
rm(Res, DEGs)

6.2 Thyroid Inhibitor vs DMSO

6.2.1 DEA

res <- results(dds, contrast=c("Condition","Thyr_Inh", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 2555, 17%
## LFC < 0 (down)     : 2419, 16%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

6.2.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "Thyr_Inh", "DMSO"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 2555, 17%
## LFC < 0 (down)     : 2419, 16%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition Thyr_Inh vs DMSO 
## Wald test p-value: Condition Thyr_Inh vs DMSO 
## DataFrame with 6 rows and 5 columns
##         baseMean log2FoldChange     lfcSE      pvalue        padj
##        <numeric>      <numeric> <numeric>   <numeric>   <numeric>
## SAPCD1   92.7142       -3.46684  0.479136 8.12463e-15 1.20488e-10
## CPT1A   812.8093        2.81992  0.402110 6.35518e-14 4.35877e-10
## DNAJB1 7763.6368        1.96212  0.326263 9.57953e-14 4.35877e-10
## JUNB    550.4433        3.76525  0.563498 1.17566e-13 4.35877e-10
## ZSCAN1  306.9018       -2.39878  0.368148 1.85728e-13 4.59058e-10
## H2BC8   546.9718        2.04666  0.339291 1.65834e-13 4.59058e-10
  • Genes modulated considering a threshold of 0.01 on the FDR: 2234
  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 1842
  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 458

6.2.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

plotMA(Res, ylim=c(-4,4))

Counts

par(mfrow=c(3,4))

#grepl(Path, colnames(SE_DEA))

for(i in 1:12){
  plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
}

6.2.4 Store Results

DEA_Res[[Path]]$Inhibitor$Res <- Res
DEA_Res[[Path]]$Inhibitor$DEGs <- DEGs

names(DEA_Res[[Path]])
## [1] "Agonist"   "Inhibitor"
rm(Res, DEGs)

6.3Thyroid Agonist vs Inhibitor

6.3.1 DEA

res <- results(dds, contrast=c("Condition","Thyr_Ag", "Thyr_Inh"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 1899, 13%
## LFC < 0 (down)     : 2077, 14%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

6.3.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "Thyr_Ag", "Thyr_Inh"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 1899, 13%
## LFC < 0 (down)     : 2077, 14%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition Thyr_Ag vs Thyr_Inh 
## Wald test p-value: Condition Thyr_Ag vs Thyr_Inh 
## DataFrame with 6 rows and 5 columns
##         baseMean log2FoldChange     lfcSE      pvalue        padj
##        <numeric>      <numeric> <numeric>   <numeric>   <numeric>
## H2BC5  2477.4868       -2.81169  0.255617 3.87345e-29 5.74433e-25
## H2BC8   546.9718       -3.49647  0.325289 1.52477e-28 1.13062e-24
## H2BC7    87.6608       -3.73703  0.372256 1.40452e-25 6.94301e-22
## HSPA6   653.5852       -6.82386  0.858327 6.27416e-22 2.32614e-18
## H2BC21 4323.4556       -1.70356  0.206361 8.64879e-21 2.56523e-17
## H4C5    130.8932       -3.44278  0.387489 1.76435e-20 4.36089e-17
  • Genes modulated considering a threshold of 0.01 on the FDR: 1479
  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 1249
  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 394

6.3.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

plotMA(Res, ylim=c(-4,4))

Counts

par(mfrow=c(3,4))

#grepl(Path, colnames(SE_DEA))

for(i in 1:12){
  plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
}

6.3.4 Store Results

DEA_Res[[Path]]$AgvsInh$Res <- Res
DEA_Res[[Path]]$AgvsInh$DEGs <- DEGs

names(DEA_Res[[Path]])
## [1] "Agonist"   "Inhibitor" "AgvsInh"
rm(Res, DEGs)

7. Retinoid treatment

7.1 Retinoid Agonist vs DMSO

Path <- Treat[1]

7.1.1 DEA

res <- results(dds, contrast=c("Condition","Ret_Ag", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 3933, 26%
## LFC < 0 (down)     : 4092, 28%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

7.1.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "Ret_Ag", "DMSO"),  type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 3935, 26%
## LFC < 0 (down)     : 4095, 28%
## outliers [1]       : 2, 0.013%
## low counts [2]     : 0, 0%
## (mean count < 11)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition Ret_Ag vs DMSO 
## Wald test p-value: Condition Ret_Ag vs DMSO 
## DataFrame with 6 rows and 5 columns
##          baseMean log2FoldChange     lfcSE       pvalue         padj
##         <numeric>      <numeric> <numeric>    <numeric>    <numeric>
## ZIC2    10205.434       -8.52948  0.291056 2.33755e-191 3.47080e-187
## ZIC5     3033.439       -8.57231  0.477866  1.29000e-74  9.57693e-71
## SKAP2     466.358        5.37693  0.333708  1.16388e-61  5.76044e-58
## AFMID     721.995       -2.90741  0.187198  1.53227e-55  5.68778e-52
## SLC6A11   664.601        4.62415  0.305635  2.65371e-54  7.88044e-51
## ZIC3     5794.338       -6.23369  0.430732  3.68138e-51  9.11020e-48
  • Genes modulated considering a threshold of 0.01 on the FDR: 5247
  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 4745
  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 3046

7.1.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

S <- grepl('DMSO|Ret', colnames(SE_DEA))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

plotMA(Res, ylim=c(-4,4))

Counts

par(mfrow=c(3,4))

#grepl(Path, colnames(SE_DEA))

for(i in 1:12){
  plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
}

7.1.4 Store Results

DEA_Res[[Path]]$Agonist$Res <- Res
DEA_Res[[Path]]$Agonist$DEGs <- DEGs

rm(Res, DEGs)

7.2 Retinoid Antagonist vs DMSO

7.2.1 DEA

res <- results(dds, contrast=c("Condition","Ret_Inh", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 72, 0.48%
## LFC < 0 (down)     : 146, 0.98%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

7.2.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "Ret_Inh", "DMSO"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 72, 0.48%
## LFC < 0 (down)     : 146, 0.98%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition Ret_Inh vs DMSO 
## Wald test p-value: Condition Ret_Inh vs DMSO 
## DataFrame with 6 rows and 5 columns
##             baseMean log2FoldChange     lfcSE      pvalue        padj
##            <numeric>      <numeric> <numeric>   <numeric>   <numeric>
## H2BC5       2477.487      -1.402637  0.408671 6.33339e-09 9.39242e-05
## H1-2        2271.601      -2.564610  0.615291 3.61033e-08 2.67706e-04
## CHORDC1     2217.862      -0.675637  0.221673 1.09383e-07 3.24430e-04
## AP3S1       2421.126      -0.600916  0.150068 9.08135e-08 3.24430e-04
## PDCD6-AHRR   121.047     -20.073776  6.157243 6.63782e-08 3.24430e-04
## FP565260.6   563.082      -1.385639  0.562029 1.67777e-07 4.14688e-04
  • Genes modulated considering a threshold of 0.01 on the FDR: 15
  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 12
  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 8

7.2.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

plotMA(Res, ylim=c(-4,4))

Counts

par(mfrow=c(3,4))

for(i in 1:12){
  plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
}

7.2.4 Store Results

DEA_Res[[Path]]$Inhibitor$Res <- Res
DEA_Res[[Path]]$Inhibitor$DEGs <- DEGs

rm(Res, DEGs)

7.3 Retinoic Agonist vs Inhibitor

7.3.1 DEA

res <- results(dds, contrast=c("Condition","Ret_Ag", "Ret_Inh"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 3394, 23%
## LFC < 0 (down)     : 2831, 19%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

7.3.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "Ret_Ag", "Ret_Inh"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 3394, 23%
## LFC < 0 (down)     : 2831, 19%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition Ret_Ag vs Ret_Inh 
## Wald test p-value: Condition Ret_Ag vs Ret_Inh 
## DataFrame with 6 rows and 5 columns
##        baseMean log2FoldChange     lfcSE       pvalue         padj
##       <numeric>      <numeric> <numeric>    <numeric>    <numeric>
## ZIC2  10205.434       -7.52648  0.337442 1.12724e-115 1.67170e-111
## ZIC5   3033.439       -7.59036  0.542526  8.98744e-50  6.66419e-46
## ITGA5   791.386       -2.63345  0.188107  3.48037e-45  1.72046e-41
## AFMID   721.995       -2.82942  0.218083  3.03930e-39  1.12682e-35
## SKAP2   466.358        4.95614  0.394580  1.81437e-38  5.38141e-35
## ZIC3   5794.338       -6.14793  0.513857  4.64307e-37  1.14761e-33
  • Genes modulated considering a threshold of 0.01 on the FDR: 3621
  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 3409
  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 2425

7.3.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.

Heatmap all samples

sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

plotMA(Res, ylim=c(-4,4))

Counts

par(mfrow=c(3,4))

#grepl(Path, colnames(SE_DEA))

for(i in 1:12){
  plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
}

7.3.4 Store Results

DEA_Res[[Path]]$AgvsInh$Res <- Res
DEA_Res[[Path]]$AgvsInh$DEGs <- DEGs

rm(Res, DEGs)

8. LiverX treatment

8.1 LiverX Agonist vs DMSO

Path <- Treat[3]

8.1.1 DEA

res <- results(dds, contrast=c("Condition","LivX_Ag", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 2780, 19%
## LFC < 0 (down)     : 2993, 20%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

8.1.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "LivX_Ag", "DMSO"),  type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 2782, 19%
## LFC < 0 (down)     : 2992, 20%
## outliers [1]       : 2, 0.013%
## low counts [2]     : 0, 0%
## (mean count < 11)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition LivX_Ag vs DMSO 
## Wald test p-value: Condition LivX_Ag vs DMSO 
## DataFrame with 6 rows and 5 columns
##         baseMean log2FoldChange     lfcSE      pvalue        padj
##        <numeric>      <numeric> <numeric>   <numeric>   <numeric>
## AMIGO3  144.7007      -22.40703  1.719571 4.95715e-39 7.36037e-35
## LBX1    173.5543      -20.80166  2.253478 1.73219e-20 1.28598e-16
## ABCA1  1677.0892        2.91962  0.347635 3.67117e-20 1.81698e-16
## SREBF1 2029.5477        1.56724  0.203640 3.88429e-17 1.44185e-13
## ABCG1  1468.2671        2.54756  0.374515 2.19151e-14 5.42326e-11
## SAPCD1   92.7142       -3.23344  0.525797 1.85374e-14 5.42326e-11
  • Genes modulated considering a threshold of 0.01 on the FDR: 2677
  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 2281
  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 833

8.1.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

S <- grepl('DMSO|LivX', colnames(SE_DEA))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

plotMA(Res, ylim=c(-4,4))

Counts

par(mfrow=c(3,4))

for(i in 1:12){
  plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
}

8.1.4 Store Results

DEA_Res[[Path]]$Agonist$Res <- Res
DEA_Res[[Path]]$Agonist$DEGs <- DEGs

rm(Res, DEGs)

8.2 LiverX Inhibitor vs DMSO

8.2.1 DEA

res <- results(dds, contrast=c("Condition","LivX_Inh", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 728, 4.9%
## LFC < 0 (down)     : 888, 6%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

8.2.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "LivX_Inh", "DMSO"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 728, 4.9%
## LFC < 0 (down)     : 888, 6%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition LivX_Inh vs DMSO 
## Wald test p-value: Condition LivX_Inh vs DMSO 
## DataFrame with 6 rows and 5 columns
##         baseMean log2FoldChange     lfcSE      pvalue        padj
##        <numeric>      <numeric> <numeric>   <numeric>   <numeric>
## SREBF1  2029.548      -1.964511  0.199743 3.24981e-24 4.81946e-20
## ABCG1   1468.267      -3.298157  0.393789 1.35238e-20 1.00279e-16
## ABCA1   1677.089      -3.042843  0.360180 2.26313e-20 1.11874e-16
## HOXA3    285.717     -21.296114  3.342062 9.98135e-11 3.70058e-07
## LPCAT3   700.781      -0.842162  0.176047 1.54263e-09 4.57543e-06
## H2BC5   2477.487      -1.105675  0.287412 2.58503e-08 6.38932e-05
  • Genes modulated considering a threshold of 0.01 on the FDR: 170
  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 126
  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 46

8.2.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

plotMA(Res, ylim=c(-4,4))

Counts

par(mfrow=c(3,4))

for(i in 1:12){
  plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
}

8.2.4 Store Results

DEA_Res[[Path]]$Inhibitor$Res <- Res
DEA_Res[[Path]]$Inhibitor$DEGs <- DEGs

rm(Res, DEGs)

8.3 LiverX Agonist vs Inhibitor

8.3.1 DEA

res <- results(dds, contrast=c("Condition","LivX_Ag", "LivX_Inh"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 2412, 16%
## LFC < 0 (down)     : 1971, 13%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

8.3.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "LivX_Ag", "LivX_Inh"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 2412, 16%
## LFC < 0 (down)     : 1971, 13%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition LivX_Ag vs LivX_Inh 
## Wald test p-value: Condition LivX_Ag vs LivX_Inh 
## DataFrame with 6 rows and 5 columns
##         baseMean log2FoldChange     lfcSE      pvalue        padj
##        <numeric>      <numeric> <numeric>   <numeric>   <numeric>
## SREBF1  2029.548        3.59651  0.226002 8.13030e-65 1.20572e-60
## ABCA1   1677.089        6.58765  0.392549 1.38975e-63 1.03050e-59
## ABCG1   1468.267        6.55674  0.425904 7.37881e-54 3.64759e-50
## AMIGO3   144.701      -23.01500  1.798405 6.22626e-38 2.30839e-34
## LPCAT3   700.781        1.57681  0.177063 1.31240e-20 3.89259e-17
## H2BC5   2477.487        1.98751  0.256835 7.82156e-17 1.93323e-13
  • Genes modulated considering a threshold of 0.01 on the FDR: 1437
  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 1278
  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 614

8.3.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

plotMA(Res, ylim=c(-4,4))

Counts

par(mfrow=c(3,4))

#grepl(Path, colnames(SE_DEA))

for(i in 1:12){
  plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
}

8.3.4 Store Results

DEA_Res[[Path]]$AgvsInh$Res <- Res
DEA_Res[[Path]]$AgvsInh$DEGs <- DEGs

rm(Res, DEGs)

9. Estrogen treatment

9.1 Estrogen Agonist vs DMSO

Path <- Treat[4]
Path
## [1] "Estr"

9.1.1 DEA

res <- results(dds, contrast=c("Condition","Estr_Ag", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 3457, 23%
## LFC < 0 (down)     : 2719, 18%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

9.1.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "Estr_Ag", "DMSO"),  type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 3459, 23%
## LFC < 0 (down)     : 2719, 18%
## outliers [1]       : 2, 0.013%
## low counts [2]     : 0, 0%
## (mean count < 11)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition Estr_Ag vs DMSO 
## Wald test p-value: Condition Estr_Ag vs DMSO 
## DataFrame with 6 rows and 5 columns
##           baseMean log2FoldChange     lfcSE      pvalue        padj
##          <numeric>      <numeric> <numeric>   <numeric>   <numeric>
## ACKR3      990.134       -3.66417  0.373876 8.92705e-25 6.62745e-21
## SFRP2     1277.511       -6.25568  0.707080 4.61145e-25 6.62745e-21
## ANKRD33B   478.485       -4.51824  0.535963 4.16189e-20 2.05986e-16
## PDLIM3    1374.231       -3.90408  0.458344 8.31445e-20 3.08633e-16
## FGFR3     7653.864       -7.24816  0.955405 1.76035e-19 5.22753e-16
## ATP6V0A1  7811.837        1.65205  0.211081 8.87560e-17 2.19642e-13
  • Genes modulated considering a threshold of 0.01 on the FDR: 3102
  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 2860
  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 1702

9.1.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

S <- grepl('DMSO|Estr', colnames(SE_DEA))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

plotMA(Res, ylim=c(-4,4))

Counts

par(mfrow=c(3,4))

for(i in 1:12){
  plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
}

9.1.4 Store Results

DEA_Res[[Path]]$Agonist$Res <- Res
DEA_Res[[Path]]$Agonist$DEGs <- DEGs

rm(Res, DEGs)

9.2 Estrogen Inhibitor vs DMSO

No samples for this treatment are left after the QC.

9.3 Estrogen Agonist vs Inhibitor

Not possible for the lack of Inhibitor-treated samples.


10. Androgen treatment

10.1 Androgen Agonist vs DMSO

Path <- Treat[5]
Path
## [1] "Andr"

10.1.1 DEA

res <- results(dds, contrast=c("Condition","Andr_Ag", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 362, 2.4%
## LFC < 0 (down)     : 439, 3%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

10.1.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "Andr_Ag", "DMSO"),  type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 363, 2.4%
## LFC < 0 (down)     : 439, 3%
## outliers [1]       : 2, 0.013%
## low counts [2]     : 0, 0%
## (mean count < 11)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition Andr_Ag vs DMSO 
## Wald test p-value: Condition Andr_Ag vs DMSO 
## DataFrame with 6 rows and 5 columns
##         baseMean log2FoldChange     lfcSE      pvalue        padj
##        <numeric>      <numeric> <numeric>   <numeric>   <numeric>
## GPR83   111.3070       2.228434  0.344040 5.32591e-12 7.90790e-08
## ETV4    551.7481       1.680794  0.375091 2.37707e-08 1.17649e-04
## H1-2   2271.6009      -1.888883  0.407034 2.26831e-08 1.17649e-04
## CCL25    27.5643      -1.732509  0.588293 7.62259e-07 2.82951e-03
## SPRED3 1209.1816       1.067059  0.446071 1.17270e-06 3.48245e-03
## SHC3   3546.0285       0.965898  0.426251 1.63570e-06 4.04781e-03
  • Genes modulated considering a threshold of 0.01 on the FDR: 14
  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 13
  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 10

10.1.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

S <- grepl('DMSO|Andr', colnames(SE_DEA))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

plotMA(Res, ylim=c(-4,4))

Counts

par(mfrow=c(3,4))

for(i in 1:12){
  plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
}

10.1.4 Store Results

DEA_Res[[Path]]$Agonist$Res <- Res
DEA_Res[[Path]]$Agonist$DEGs <- DEGs

rm(Res, DEGs)

10.2 Androgen Inhibitor vs DMSO

10.2.1 DEA

res <- results(dds, contrast=c("Condition","Andr_Inh", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 79, 0.53%
## LFC < 0 (down)     : 98, 0.66%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

10.2.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "Andr_Inh", "DMSO"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 79, 0.53%
## LFC < 0 (down)     : 98, 0.66%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition Andr_Inh vs DMSO 
## Wald test p-value: Condition Andr_Inh vs DMSO 
## DataFrame with 6 rows and 5 columns
##          baseMean log2FoldChange     lfcSE      pvalue        padj
##         <numeric>      <numeric> <numeric>   <numeric>   <numeric>
## IQCN     683.7079      -1.191142  0.204636 2.29537e-10 2.86883e-06
## H2BC5   2477.4868      -1.356287  0.234839 3.86895e-10 2.86883e-06
## SLFNL1    92.2611      -1.382435  0.290498 1.81433e-08 8.96882e-05
## CHORDC1 2217.8619      -0.810019  0.244348 6.46333e-08 2.39628e-04
## RNF31   1588.8192       0.490045  0.140362 1.01109e-07 2.99889e-04
## TRPM4   1093.5564       1.102341  0.302760 1.74719e-07 4.31848e-04
  • Genes modulated considering a threshold of 0.01 on the FDR: 27
  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 16
  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 9

10.2.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

plotMA(Res, ylim=c(-4,4))

Counts

par(mfrow=c(3,4))

for(i in 1:12){
  plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
}

10.2.4 Store Results

DEA_Res[[Path]]$Inhibitor$Res <- Res
DEA_Res[[Path]]$Inhibitor$DEGs <- DEGs

rm(Res, DEGs)

10.3 Androgen Agonist vs Inhibitor

10.3.1 DEA

res <- results(dds, contrast=c("Condition","Andr_Ag", "Andr_Inh"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 155, 1%
## LFC < 0 (down)     : 163, 1.1%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

10.3.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "Andr_Ag", "Andr_Inh"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 155, 1%
## LFC < 0 (down)     : 163, 1.1%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition Andr_Ag vs Andr_Inh 
## Wald test p-value: Condition Andr_Ag vs Andr_Inh 
## DataFrame with 6 rows and 5 columns
##         baseMean log2FoldChange     lfcSE      pvalue        padj
##        <numeric>      <numeric> <numeric>   <numeric>   <numeric>
## TRPM4   1093.556       -1.93622  0.266926 1.45726e-14 2.16112e-10
## GPR83    111.307        2.82382  0.528999 2.08634e-11 1.54702e-07
## DOCK6   1069.304       -1.54904  0.265794 4.70595e-10 2.32631e-06
## BCKDHA  1955.028       -1.09020  0.234496 1.82864e-08 6.77968e-05
## CDKN1A 10791.282       -1.77197  0.360168 2.53317e-08 7.51337e-05
## DOK3     216.497        1.80667  0.372697 3.12047e-08 7.71275e-05
  • Genes modulated considering a threshold of 0.01 on the FDR: 30
  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 26
  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 23

10.3.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

plotMA(Res, ylim=c(-4,4))

Counts

par(mfrow=c(3,4))

#grepl(Path, colnames(SE_DEA))

for(i in 1:12){
  plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
}

10.3.4 Store Results

DEA_Res[[Path]]$AgvsInh$Res <- Res
DEA_Res[[Path]]$AgvsInh$DEGs <- DEGs

rm(Res, DEGs)

11. Glucocorticoid treatment

11.1 Glucocorticoid Agonist vs DMSO

Path <- Treat[6]
Path
## [1] "GC"

11.1.1 DEA

res <- results(dds, contrast=c("Condition","GC_Ag", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 25, 0.17%
## LFC < 0 (down)     : 5, 0.034%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

11.1.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "GC_Ag", "DMSO"),  type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 25, 0.17%
## LFC < 0 (down)     : 5, 0.034%
## outliers [1]       : 2, 0.013%
## low counts [2]     : 0, 0%
## (mean count < 11)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition GC_Ag vs DMSO 
## Wald test p-value: Condition GC_Ag vs DMSO 
## DataFrame with 6 rows and 5 columns
##             baseMean log2FoldChange     lfcSE      pvalue        padj
##            <numeric>      <numeric> <numeric>   <numeric>   <numeric>
## AC009412.1  830.1102      -26.03516  2.795100 7.38515e-21 1.09655e-16
## FKBP5       858.7280        3.13529  0.405841 1.38188e-15 1.02591e-11
## PNMT         58.7006        2.73417  0.445730 1.75974e-10 8.70955e-07
## TSC22D3    2443.7675        1.34974  0.219157 5.08873e-10 1.88894e-06
## NIBAN1      366.3420        1.85523  0.335001 1.28294e-08 3.80980e-05
## EDARADD      51.3034        2.33912  0.443467 2.88220e-08 7.13249e-05
  • Genes modulated considering a threshold of 0.01 on the FDR: 10
  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 9
  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 9

11.1.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

S <- grepl('DMSO|GC', colnames(SE_DEA))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

plotMA(Res, ylim=c(-4,4))

Counts

par(mfrow=c(3,4))

for(i in 1:12){
  plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
}

11.1.4 Store Results

DEA_Res[[Path]]$Agonist$Res <- Res
DEA_Res[[Path]]$Agonist$DEGs <- DEGs

rm(Res, DEGs)

11.2 Glucocorticoid Inhibitor vs DMSO

11.2.1 DEA

res <- results(dds, contrast=c("Condition","GC_Inh", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 977, 6.6%
## LFC < 0 (down)     : 1850, 12%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

11.2.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "GC_Inh", "DMSO"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 977, 6.6%
## LFC < 0 (down)     : 1850, 12%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition GC_Inh vs DMSO 
## Wald test p-value: Condition GC_Inh vs DMSO 
## DataFrame with 6 rows and 5 columns
##          baseMean log2FoldChange     lfcSE      pvalue        padj
##         <numeric>      <numeric> <numeric>   <numeric>   <numeric>
## H2BC5    2477.487       2.196000  0.269368 2.76292e-17 4.09741e-13
## UROS     1190.069      -1.796019  0.256517 8.79793e-14 5.50278e-10
## H2BC8     546.972       2.384224  0.340368 1.11317e-13 5.50278e-10
## FBLN5    1094.935      -3.849070  0.598538 1.91004e-13 7.08147e-10
## CYP51A1  7039.640       0.881434  0.132817 3.14645e-13 9.33237e-10
## GSTZ1     363.606      -1.795115  0.269785 8.22470e-13 1.38159e-09
  • Genes modulated considering a threshold of 0.01 on the FDR: 815
  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 712
  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 351

11.2.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

plotMA(Res, ylim=c(-4,4))

Counts

par(mfrow=c(3,4))

for(i in 1:12){
  plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
}

11.2.4 Store Results

DEA_Res[[Path]]$Inhibitor$Res <- Res
DEA_Res[[Path]]$Inhibitor$DEGs <- DEGs

rm(Res, DEGs)

11.3 Glucocorticoid Agonist vs Inhibitor

11.3.1 DEA

res <- results(dds, contrast=c("Condition","GC_Ag", "GC_Inh"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 1215, 8.2%
## LFC < 0 (down)     : 773, 5.2%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

11.3.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "GC_Ag", "GC_Inh"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 1215, 8.2%
## LFC < 0 (down)     : 773, 5.2%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition GC_Ag vs GC_Inh 
## Wald test p-value: Condition GC_Ag vs GC_Inh 
## DataFrame with 6 rows and 5 columns
##          baseMean log2FoldChange     lfcSE      pvalue        padj
##         <numeric>      <numeric> <numeric>   <numeric>   <numeric>
## H2BC5    2477.487       -2.72836  0.326425 8.33201e-18 1.23564e-13
## CHORDC1  2217.862       -1.79622  0.249883 3.81439e-14 2.82837e-10
## H2AC11    288.391       -2.30373  0.320512 1.19091e-13 5.88707e-10
## ATP1A2   8508.920        3.68339  0.543133 2.04929e-13 7.59773e-10
## CCDC117  2414.585       -1.52778  0.246273 1.81079e-12 5.37079e-09
## H2BC8     546.972       -2.67762  0.409089 7.11951e-12 1.75970e-08
  • Genes modulated considering a threshold of 0.01 on the FDR: 487
  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 464
  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 300

11.3.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

plotMA(Res, ylim=c(-4,4))

Counts

par(mfrow=c(3,4))

#grepl(Path, colnames(SE_DEA))

for(i in 1:12){
  plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
}

11.3.4 Store Results

DEA_Res[[Path]]$AgvsInh$Res <- Res
DEA_Res[[Path]]$AgvsInh$DEGs <- DEGs

rm(Res, DEGs)

12. Arhyl Hydrocarbon treatment

12.1 Arhyl Hydrocarbon Agonist vs DMSO

Path <- Treat[7]
Path
## [1] "AhHyd"

12.1.1 DEA

res <- results(dds, contrast=c("Condition","AhHyd_Ag", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 70, 0.47%
## LFC < 0 (down)     : 426, 2.9%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

12.1.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "AhHyd_Ag", "DMSO"),  type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 70, 0.47%
## LFC < 0 (down)     : 427, 2.9%
## outliers [1]       : 2, 0.013%
## low counts [2]     : 0, 0%
## (mean count < 11)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition AhHyd_Ag vs DMSO 
## Wald test p-value: Condition AhHyd_Ag vs DMSO 
## DataFrame with 6 rows and 5 columns
##         baseMean log2FoldChange     lfcSE      pvalue        padj
##        <numeric>      <numeric> <numeric>   <numeric>   <numeric>
## UTP14C  714.8678      -20.28483  1.632970 4.82229e-36 7.16013e-32
## H2BC5  2477.4868       -1.71901  0.221179 2.02456e-16 1.01714e-12
## HSPA6   653.5852       -5.77396  0.718980 2.05510e-16 1.01714e-12
## H2BC21 4323.4556       -1.21541  0.163588 1.80547e-14 6.70190e-11
## H1-2   2271.6009       -2.42912  0.446741 1.92163e-11 5.70648e-08
## ADGRF3   58.9674       -1.75087  0.327039 1.62849e-09 4.02998e-06
  • Genes modulated considering a threshold of 0.01 on the FDR: 83
  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 76
  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 61

12.1.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

S <- grepl('DMSO|AhHyd', colnames(SE_DEA))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

plotMA(Res, ylim=c(-4,4))

Counts

par(mfrow=c(3,4))

for(i in 1:12){
  plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
}

12.1.4 Store Results

DEA_Res[[Path]]$Agonist$Res <- Res
DEA_Res[[Path]]$Agonist$DEGs <- DEGs

rm(Res, DEGs)

12.2 Arhyl Hydrocarbon Inhibitor vs DMSO

12.2.1 DEA

res <- results(dds, contrast=c("Condition","AhHyd_Inh", "DMSO"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 1312, 8.8%
## LFC < 0 (down)     : 1756, 12%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

12.2.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "AhHyd_Inh", "DMSO"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 1312, 8.8%
## LFC < 0 (down)     : 1756, 12%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition AhHyd_Inh vs DMSO 
## Wald test p-value: Condition AhHyd_Inh vs DMSO 
## DataFrame with 6 rows and 5 columns
##            baseMean log2FoldChange     lfcSE      pvalue        padj
##           <numeric>      <numeric> <numeric>   <numeric>   <numeric>
## SAPCD1      92.7142      -4.330887  0.555055 4.31588e-17 6.40045e-13
## SREBF1    2029.5477      -1.447524  0.218664 3.17663e-16 2.35547e-12
## CDC42BPB 13404.2539       0.626203  0.100081 6.30106e-11 3.11482e-07
## ALDOC     2144.5707      -2.382038  0.534933 1.32015e-10 4.89446e-07
## RAPGEF1   7736.8248       1.007593  0.190124 8.29152e-10 2.45926e-06
## CYFIP2   14784.2824       0.682792  0.120622 1.65288e-09 3.50175e-06
  • Genes modulated considering a threshold of 0.01 on the FDR: 716
  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 550
  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 42

12.2.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

plotMA(Res, ylim=c(-4,4))

Counts

par(mfrow=c(3,4))

for(i in 1:12){
  plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
}

12.2.4 Store Results

DEA_Res[[Path]]$Inhibitor$Res <- Res
DEA_Res[[Path]]$Inhibitor$DEGs <- DEGs

rm(Res, DEGs)

12.3 Arhyl Hydrocarbon Agonist vs Inhibitor

12.3.1 DEA

res <- results(dds, contrast=c("Condition","AhHyd_Ag", "AhHyd_Inh"), alpha=0.01, cooksCutoff=0.90,  independentFiltering=FALSE)
summary(res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 969, 6.5%
## LFC < 0 (down)     : 1510, 10%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

12.3.2 Fold-change shrinkage

Res <- lfcShrink(dds, contrast=c("Condition", "AhHyd_Ag", "AhHyd_Inh"), res=res, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
##     Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
##     https://doi.org/10.1093/biostatistics/kxw041
  
summary(Res)
## 
## out of 14850 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 969, 6.5%
## LFC < 0 (down)     : 1510, 10%
## outliers [1]       : 20, 0.13%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
rm(res)

head(Res[order(Res$padj),])
## log2 fold change (MMSE): Condition AhHyd_Ag vs AhHyd_Inh 
## Wald test p-value: Condition AhHyd_Ag vs AhHyd_Inh 
## DataFrame with 6 rows and 5 columns
##         baseMean log2FoldChange     lfcSE      pvalue        padj
##        <numeric>      <numeric> <numeric>   <numeric>   <numeric>
## UTP14C  714.8678      -20.37451  1.937325 2.65860e-26 3.94270e-22
## H2BC5  2477.4868       -2.12683  0.301982 2.24445e-14 1.66426e-10
## H2BC8   546.9718       -2.38897  0.383283 1.86661e-12 9.22726e-09
## SAPCD1   92.7142        3.81988  0.789917 2.76534e-12 1.02525e-08
## H2BC7    87.6608       -2.43042  0.432574 4.85454e-11 1.43986e-07
## HEXA   2263.1440        1.30116  0.235179 1.17491e-10 2.90397e-07
  • Genes modulated considering a threshold of 0.01 on the FDR: 546
  • Genes modulated considering a threshold of 0.01 on the FDR and of 1.5 on the FC: 508
  • Genes modulated considering a threshold of 0.01 on the FDR and of 2 on the FC: 286

12.3.3 Visualization

Heatmap selected samples

DEGs <- dplyr::filter(data.frame(Res), padj < 0.01, abs(log2FoldChange)>log2(2))

ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')
sechm(SE_DEA[,S], features=row.names(DEGs[order(DEGs$padj), ]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

Heatmap all samples

sechm(SE_DEA, features=row.names(DEGs[order(DEGs$padj), ][1:1000,]), assayName="vst", gaps_at="Condition",
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols,
      do.scale=TRUE, breaks=0.8)

MA Plot

plotMA(Res, ylim=c(-4,4))

Counts

par(mfrow=c(3,4))

#grepl(Path, colnames(SE_DEA))

for(i in 1:12){
  plotCounts(dds[, grepl(Path, colnames(SE_DEA))], gene=row.names(Res[order(Res$padj), ])[i], intgroup="Condition")
}

12.3.4 Store Results

DEA_Res[[Path]]$AgvsInh$Res <- Res
DEA_Res[[Path]]$AgvsInh$DEGs <- DEGs

rm(Res, DEGs)

13. Savings

saveRDS(DEA_Res, paste0(OutputFolder, '/', 'DEARes.rds')) 
saveRDS(SE_DEA, paste0(OutputFolder, '/', 'SE_DEA.rds')) 
SessionInfo <- sessionInfo()
Date <- date()
save.image(paste0(OutputFolder, '/', 'DEA.RData'))
Date
## [1] "Fri Jul 18 14:11:48 2025"
SessionInfo
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] ashr_2.2-54                 sechm_1.6.0                
##  [3] ggplot2_3.4.1               DESeq2_1.38.3              
##  [5] SummarizedExperiment_1.28.0 Biobase_2.58.0             
##  [7] MatrixGenerics_1.10.0       matrixStats_0.63.0         
##  [9] GenomicRanges_1.50.2        GenomeInfoDb_1.34.9        
## [11] IRanges_2.32.0              S4Vectors_0.36.1           
## [13] BiocGenerics_0.44.0        
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-7           bit64_4.0.5            doParallel_1.0.17     
##  [4] RColorBrewer_1.1-3     httr_1.4.5             tools_4.2.1           
##  [7] bslib_0.4.2            irlba_2.3.5.1          utf8_1.2.3            
## [10] R6_2.5.1               DBI_1.1.3              colorspace_2.1-0      
## [13] GetoptLong_1.0.5       withr_2.5.0            tidyselect_1.2.0      
## [16] bit_4.0.5              curl_5.0.0             compiler_4.2.1        
## [19] cli_3.6.1              TSP_1.2-2              DelayedArray_0.24.0   
## [22] labeling_0.4.2         sass_0.4.5             scales_1.2.1          
## [25] SQUAREM_2021.1         mixsqp_0.3-48          stringr_1.5.0         
## [28] digest_0.6.31          rmarkdown_2.20         ca_0.71.1             
## [31] XVector_0.38.0         pkgconfig_2.0.3        htmltools_0.5.4       
## [34] highr_0.10             invgamma_1.1           fastmap_1.1.1         
## [37] rlang_1.1.1            GlobalOptions_0.1.2    rstudioapi_0.14       
## [40] RSQLite_2.3.0          farver_2.1.1           shape_1.4.6           
## [43] jquerylib_0.1.4        generics_0.1.3         jsonlite_1.8.4        
## [46] BiocParallel_1.32.5    dplyr_1.1.0            RCurl_1.98-1.10       
## [49] magrittr_2.0.3         GenomeInfoDbData_1.2.9 Matrix_1.5-3          
## [52] Rcpp_1.0.10            munsell_0.5.0          fansi_1.0.4           
## [55] lifecycle_1.0.3        stringi_1.7.12         yaml_2.3.7            
## [58] zlibbioc_1.44.0        Rtsne_0.16             grid_4.2.1            
## [61] blob_1.2.3             parallel_4.2.1         randomcoloR_1.1.0.1   
## [64] crayon_1.5.2           lattice_0.20-45        Biostrings_2.66.0     
## [67] annotate_1.76.0        circlize_0.4.15        KEGGREST_1.38.0       
## [70] locfit_1.5-9.7         knitr_1.42             ComplexHeatmap_2.14.0 
## [73] pillar_1.8.1           rjson_0.2.21           geneplotter_1.76.0    
## [76] codetools_0.2-19       XML_3.99-0.13          glue_1.6.2            
## [79] evaluate_0.20          V8_4.2.2               png_0.1-8             
## [82] vctrs_0.6.2            foreach_1.5.2          gtable_0.3.1          
## [85] clue_0.3-64            cachem_1.0.7           xfun_0.37             
## [88] xtable_1.8-4           truncnorm_1.0-8        seriation_1.4.1       
## [91] tibble_3.2.1           iterators_1.0.14       registry_0.5-1        
## [94] AnnotationDbi_1.60.0   memoise_2.0.1          cluster_2.1.4