suppressPackageStartupMessages({
  library(SummarizedExperiment)
  library(SEtools)   
  library(edgeR)
  library(DT)
  library(pheatmap)
  library(plotly)
  library(dplyr)
  library(sva)
})

## Warning: replacing previous import 'ComplexHeatmap::pheatmap' by
## 'pheatmap::pheatmap' when loading 'SEtools'

load("../Data/geneLengths.RData")
source("../Functions/EDC_Functions.R")
load("../Data/TotalSumExp.RData",verbose=T)

## Loading objects:
##   Total

MixN

# subset to chronic data
se <- Total[,which(Total$EXPOduration %in% c("Chronic 15 days","Chronic50days"))]
# refactor EXPO to have DMSO as the base factor, and the other levels in a decent order
se$EXPO <- factor(as.character(se$EXPO),levels=c("DMSO","CNT","1X","1000X","BPA0.04X","T3","VITC"))
se$EXPO2 <- se$EXPO
# we will consider CNT and DMSO together; comment out following to use DMSO as baseline
levels(se$EXPO) <- c("CNT","CNT","1X","1000X","BPA0.04X","T3","VITC")
# we treat the batch as factor and use 2018 as the base level because it has the most samples
se$MixtureBatch <- relevel(as.factor(se$MixtureBatch),"2018")
se <- se[,order(se$type, se$EXPO, se$MixtureBatch)]

# we filter and normalize the two datasets separately:
org <- filterGenes(se[,which(se$EXPOduration=="Chronic50days")])

## 9986 rows out of 26300 were removed.

dds.org <- calcNormFactors(DGEList(assay(org)))
assays(org)$logcpm <- log(cpm(dds.org)+0.1)

fe <- filterGenes(se[,which(se$EXPOduration=="Chronic 15 days")])

## 12283 rows out of 26300 were removed.

dds.fe <- calcNormFactors(DGEList(assay(fe)))
assays(fe)$logcpm <- log(cpm(dds.fe)+0.1)

SVA

Organoids chronic

# performs the SVA analysis
sv <- svacor(org, ~MixtureBatch+EXPO, ~MixtureBatch,regressOutNull = T)

## Using variance-stabilizing transformation

## converting counts to integer mode

## Number of significant surrogate variables is:  6 
## Iteration (out of 5 ):1  2  3  4  5

# we put the corrected data in the total SE object 
assays(org)$corrected <- sv$cor

# we include the SVs in the object's metadata
colData(org) <- cbind(colData(org),sv$sv)

#DEA
fit <- glmQLFit(estimateDisp(dds.org,sv$mm),sv$mm)
res.org <- as.data.frame(topTags(glmQLFTest(fit,c("EXPO1X","EXPO1000X")),n=Inf))

Fetal chronic

# performs the SVA analysis
sv <- svacor(fe, ~MixtureBatch+EXPO, ~MixtureBatch,regressOutNull = T)

## Using variance-stabilizing transformation

## converting counts to integer mode

## Number of significant surrogate variables is:  6 
## Iteration (out of 5 ):1  2  3  4  5

# we put the corrected data in the total SE object 
assays(fe)$corrected <- sv$cor

# we include the SVs in the object's metadata
colData(fe) <- cbind(colData(fe),sv$sv)

#DEA
fit <- glmQLFit(estimateDisp(dds.fe,sv$mm),sv$mm)
res.fe <- as.data.frame(topTags(glmQLFTest(fit,c("EXPO1X","EXPO1000X")),n=Inf))

Fetal acute

# we prepare the acute fetal dataset
se2.fe <- filterGenes(Total[,which(Total$BrainArea=="Cortex" & Total$Mix=="MixN" & Total$EXPOduration=="Acute 48 hours")])

## 11930 rows out of 26300 were removed.

se2.fe$EXPO <- factor(as.character(se2.fe$EXPO),levels=c("DMSO","CNT","0.1X","1X","10X","100X","1000X","BPA0.04X","BPA1X","T3","T3MixN","VITC"))
se2.fe$EXPO2 <- se2.fe$EXPO
levels(se2.fe$EXPO) <- c("CNT","CNT","0.1X","1X","10X","100X","1000X","BPA0.04X","BPA1X","T3","T3MixN","VITC")
se2.fe <- se2.fe[,order(se2.fe$EXPO)]
se2.fe$YearOfExperiment <- factor(as.character(se2.fe$YearOfExperiment), levels = c("2016","2018","2019"))
se2.fe$Line <- factor(as.character(se2.fe$Line), levels = c("E3381-1","E3361-1"))
#correction
sv <- svacor(se2.fe, ~YearOfExperiment+Line+EXPO,~YearOfExperiment+Line,regressOutNull = T)

## Using variance-stabilizing transformation

## converting counts to integer mode

##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]

## Number of significant surrogate variables is:  14 
## Iteration (out of 5 ):1  2  3  4  5

# We add normalized data to the object
assays(se2.fe)$logcpm <- log(cpm(calcNormFactors(DGEList(assay(se2.fe))))+0.1)


# we include the corrected data in the object
assays(se2.fe)$corrected <- sv$cor
#DEA
dds.fe2 <- calcNormFactors(DGEList(assay(se2.fe)))
fit <- glmQLFit(estimateDisp(dds.fe2, sv$mm),sv$mm)
res.fe2 <- as.data.frame(topTags(glmQLFTest(fit,c("EXPO0.1X","EXPO1X","EXPO10X","EXPO100X","EXPO1000X")),n=Inf))

# we then merge all SEs
SEs <- list(chronic.org=org, chronic.fetal=fe, acute.fetal=se2.fe)
DEAs <- list(chronic.org=res.org, chronic.fetal=res.fe, acute.fetal=res.fe2)

#save(SEs, DEAs, file = "../Data/AllSEcorrected.RData")

Index

MixN_Acute

MixN_ChronicFetal

FunctionalFetalChronicMixN

MixN_ChronicOrganoids

FunctionalOrganoidChronicMixN

MixNVsSingleCompounds

MasterRegulatorAnalysis

Authors

Nicolò Caporale: nicolo.caporale@ieo.it, nicolo.caporale@unimi.it

Cristina Cheroni: cristina.cheroni@ieo.it

Pierre-Luc Germain: pierre-luc.germain@ieo.it

Giuseppe Testa: giuseppe.testa@ieo.it

Lab: http://www.testalab.eu/

‘Date: December 13, 2021’

sessionInfo()

## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] DESeq2_1.30.1               sva_3.38.0                 
##  [3] BiocParallel_1.24.1         genefilter_1.72.1          
##  [5] mgcv_1.8-34                 nlme_3.1-152               
##  [7] dplyr_1.0.5                 plotly_4.9.3               
##  [9] ggplot2_3.3.3               pheatmap_1.0.12            
## [11] DT_0.17                     edgeR_3.32.1               
## [13] limma_3.46.0                SEtools_1.4.0              
## [15] SummarizedExperiment_1.20.0 Biobase_2.50.0             
## [17] GenomicRanges_1.42.0        GenomeInfoDb_1.26.2        
## [19] IRanges_2.24.1              S4Vectors_0.28.1           
## [21] BiocGenerics_0.36.0         MatrixGenerics_1.2.1       
## [23] matrixStats_0.58.0         
## 
## loaded via a namespace (and not attached):
##  [1] Rtsne_0.15             colorspace_2.0-0       rjson_0.2.20          
##  [4] ellipsis_0.3.1         circlize_0.4.12        XVector_0.30.0        
##  [7] GlobalOptions_0.1.2    clue_0.3-58            bit64_4.0.5           
## [10] AnnotationDbi_1.52.0   fansi_0.4.2            codetools_0.2-18      
## [13] splines_4.0.3          cachem_1.0.4           geneplotter_1.68.0    
## [16] knitr_1.31             jsonlite_1.7.2         Cairo_1.5-12.2        
## [19] annotate_1.68.0        cluster_2.1.1          png_0.1-7             
## [22] compiler_4.0.3         httr_1.4.2             assertthat_0.2.1      
## [25] Matrix_1.3-2           fastmap_1.1.0          lazyeval_0.2.2        
## [28] htmltools_0.5.1.1      tools_4.0.3            gtable_0.3.0          
## [31] glue_1.4.2             GenomeInfoDbData_1.2.4 V8_3.4.0              
## [34] Rcpp_1.0.6             jquerylib_0.1.3        vctrs_0.3.7           
## [37] iterators_1.0.13       xfun_0.21              stringr_1.4.0         
## [40] openxlsx_4.2.3         lifecycle_1.0.0        XML_3.99-0.5          
## [43] zlibbioc_1.36.0        scales_1.1.1           TSP_1.1-10            
## [46] RColorBrewer_1.1-2     ComplexHeatmap_2.6.2   yaml_2.2.1            
## [49] curl_4.3               memoise_2.0.0          sass_0.3.1            
## [52] stringi_1.5.3          RSQLite_2.2.3          randomcoloR_1.1.0.1   
## [55] foreach_1.5.1          seriation_1.2-9        zip_2.1.1             
## [58] shape_1.4.5            rlang_0.4.10           pkgconfig_2.0.3       
## [61] bitops_1.0-6           evaluate_0.14          lattice_0.20-41       
## [64] purrr_0.3.4            htmlwidgets_1.5.3      bit_4.0.4             
## [67] tidyselect_1.1.0       magrittr_2.0.1         R6_2.5.0              
## [70] generics_0.1.0         DelayedArray_0.16.1    DBI_1.1.1             
## [73] pillar_1.5.1           withr_2.4.1            survival_3.2-7        
## [76] RCurl_1.98-1.2         tibble_3.1.0           crayon_1.4.1          
## [79] utf8_1.2.1             rmarkdown_2.7          GetoptLong_1.0.5      
## [82] locfit_1.5-9.4         grid_4.0.3             data.table_1.14.0     
## [85] blob_1.2.1             digest_0.6.27          xtable_1.8-4          
## [88] tidyr_1.1.3            munsell_0.5.0          registry_0.5-1        
## [91] viridisLite_0.3.0      bslib_0.2.4

Surrogate Variable Analysis, Batch correction and Differential Expression Analysis

MixN

SVA

Organoids chronic

Fetal chronic

Fetal acute

Index

Authors