# subset to chronic data
se <- Total[,which(Total$EXPOduration %in% c("Chronic 15 days","Chronic50days"))]
# refactor EXPO to have DMSO as the base factor, and the other levels in a decent order
se$EXPO <- factor(as.character(se$EXPO),levels=c("DMSO","CNT","1X","1000X","BPA0.04X","T3","VITC"))
se$EXPO2 <- se$EXPO
# we will consider CNT and DMSO together; comment out following to use DMSO as baseline
levels(se$EXPO) <- c("CNT","CNT","1X","1000X","BPA0.04X","T3","VITC")
# we treat the batch as factor and use 2018 as the base level because it has the most samples
se$MixtureBatch <- relevel(as.factor(se$MixtureBatch),"2018")
se <- se[,order(se$type, se$EXPO, se$MixtureBatch)]
# we filter and normalize the two datasets separately:
org <- filterGenes(se[,which(se$EXPOduration=="Chronic50days")])
Organoids chronic
# performs the SVA analysis
sv <- svacor(org, ~MixtureBatch+EXPO, ~MixtureBatch,regressOutNull = T)
## Using variance-stabilizing transformation
## converting counts to integer mode
## Number of significant surrogate variables is: 6
## Iteration (out of 5 ):1 2 3 4 5
# we put the corrected data in the total SE object
assays(org)$corrected <- sv$cor
# we include the SVs in the object's metadata
colData(org) <- cbind(colData(org),sv$sv)
#DEA
fit <- glmQLFit(estimateDisp(dds.org,sv$mm),sv$mm)
res.org <- as.data.frame(topTags(glmQLFTest(fit,c("EXPO1X","EXPO1000X")),n=Inf))
Fetal chronic
# performs the SVA analysis
sv <- svacor(fe, ~MixtureBatch+EXPO, ~MixtureBatch,regressOutNull = T)
## Using variance-stabilizing transformation
## converting counts to integer mode
## Number of significant surrogate variables is: 6
## Iteration (out of 5 ):1 2 3 4 5
# we put the corrected data in the total SE object
assays(fe)$corrected <- sv$cor
# we include the SVs in the object's metadata
colData(fe) <- cbind(colData(fe),sv$sv)
#DEA
fit <- glmQLFit(estimateDisp(dds.fe,sv$mm),sv$mm)
res.fe <- as.data.frame(topTags(glmQLFTest(fit,c("EXPO1X","EXPO1000X")),n=Inf))
Fetal acute
# we prepare the acute fetal dataset
se2.fe <- filterGenes(Total[,which(Total$BrainArea=="Cortex" & Total$Mix=="MixN" & Total$EXPOduration=="Acute 48 hours")])
## 11930 rows out of 26300 were removed.
se2.fe$EXPO <- factor(as.character(se2.fe$EXPO),levels=c("DMSO","CNT","0.1X","1X","10X","100X","1000X","BPA0.04X","BPA1X","T3","T3MixN","VITC"))
se2.fe$EXPO2 <- se2.fe$EXPO
levels(se2.fe$EXPO) <- c("CNT","CNT","0.1X","1X","10X","100X","1000X","BPA0.04X","BPA1X","T3","T3MixN","VITC")
se2.fe <- se2.fe[,order(se2.fe$EXPO)]
se2.fe$YearOfExperiment <- factor(as.character(se2.fe$YearOfExperiment), levels = c("2016","2018","2019"))
se2.fe$Line <- factor(as.character(se2.fe$Line), levels = c("E3381-1","E3361-1"))
#correction
sv <- svacor(se2.fe, ~YearOfExperiment+Line+EXPO,~YearOfExperiment+Line,regressOutNull = T)
## Using variance-stabilizing transformation
## converting counts to integer mode
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## Number of significant surrogate variables is: 14
## Iteration (out of 5 ):1 2 3 4 5
# We add normalized data to the object
assays(se2.fe)$logcpm <- log(cpm(calcNormFactors(DGEList(assay(se2.fe))))+0.1)
# we include the corrected data in the object
assays(se2.fe)$corrected <- sv$cor
#DEA
dds.fe2 <- calcNormFactors(DGEList(assay(se2.fe)))
fit <- glmQLFit(estimateDisp(dds.fe2, sv$mm),sv$mm)
res.fe2 <- as.data.frame(topTags(glmQLFTest(fit,c("EXPO0.1X","EXPO1X","EXPO10X","EXPO100X","EXPO1000X")),n=Inf))
# we then merge all SEs
SEs <- list(chronic.org=org, chronic.fetal=fe, acute.fetal=se2.fe)
DEAs <- list(chronic.org=res.org, chronic.fetal=res.fe, acute.fetal=res.fe2)
#save(SEs, DEAs, file = "../Data/AllSEcorrected.RData")
Authors
Nicolò Caporale: nicolo.caporale@ieo.it, nicolo.caporale@unimi.it
Cristina Cheroni: cristina.cheroni@ieo.it
Pierre-Luc Germain: pierre-luc.germain@ieo.it
Giuseppe Testa: giuseppe.testa@ieo.it
Lab: http://www.testalab.eu/
‘Date: December 13, 2021’
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] DESeq2_1.30.1 sva_3.38.0
## [3] BiocParallel_1.24.1 genefilter_1.72.1
## [5] mgcv_1.8-34 nlme_3.1-152
## [7] dplyr_1.0.5 plotly_4.9.3
## [9] ggplot2_3.3.3 pheatmap_1.0.12
## [11] DT_0.17 edgeR_3.32.1
## [13] limma_3.46.0 SEtools_1.4.0
## [15] SummarizedExperiment_1.20.0 Biobase_2.50.0
## [17] GenomicRanges_1.42.0 GenomeInfoDb_1.26.2
## [19] IRanges_2.24.1 S4Vectors_0.28.1
## [21] BiocGenerics_0.36.0 MatrixGenerics_1.2.1
## [23] matrixStats_0.58.0
##
## loaded via a namespace (and not attached):
## [1] Rtsne_0.15 colorspace_2.0-0 rjson_0.2.20
## [4] ellipsis_0.3.1 circlize_0.4.12 XVector_0.30.0
## [7] GlobalOptions_0.1.2 clue_0.3-58 bit64_4.0.5
## [10] AnnotationDbi_1.52.0 fansi_0.4.2 codetools_0.2-18
## [13] splines_4.0.3 cachem_1.0.4 geneplotter_1.68.0
## [16] knitr_1.31 jsonlite_1.7.2 Cairo_1.5-12.2
## [19] annotate_1.68.0 cluster_2.1.1 png_0.1-7
## [22] compiler_4.0.3 httr_1.4.2 assertthat_0.2.1
## [25] Matrix_1.3-2 fastmap_1.1.0 lazyeval_0.2.2
## [28] htmltools_0.5.1.1 tools_4.0.3 gtable_0.3.0
## [31] glue_1.4.2 GenomeInfoDbData_1.2.4 V8_3.4.0
## [34] Rcpp_1.0.6 jquerylib_0.1.3 vctrs_0.3.7
## [37] iterators_1.0.13 xfun_0.21 stringr_1.4.0
## [40] openxlsx_4.2.3 lifecycle_1.0.0 XML_3.99-0.5
## [43] zlibbioc_1.36.0 scales_1.1.1 TSP_1.1-10
## [46] RColorBrewer_1.1-2 ComplexHeatmap_2.6.2 yaml_2.2.1
## [49] curl_4.3 memoise_2.0.0 sass_0.3.1
## [52] stringi_1.5.3 RSQLite_2.2.3 randomcoloR_1.1.0.1
## [55] foreach_1.5.1 seriation_1.2-9 zip_2.1.1
## [58] shape_1.4.5 rlang_0.4.10 pkgconfig_2.0.3
## [61] bitops_1.0-6 evaluate_0.14 lattice_0.20-41
## [64] purrr_0.3.4 htmlwidgets_1.5.3 bit_4.0.4
## [67] tidyselect_1.1.0 magrittr_2.0.1 R6_2.5.0
## [70] generics_0.1.0 DelayedArray_0.16.1 DBI_1.1.1
## [73] pillar_1.5.1 withr_2.4.1 survival_3.2-7
## [76] RCurl_1.98-1.2 tibble_3.1.0 crayon_1.4.1
## [79] utf8_1.2.1 rmarkdown_2.7 GetoptLong_1.0.5
## [82] locfit_1.5-9.4 grid_4.0.3 data.table_1.14.0
## [85] blob_1.2.1 digest_0.6.27 xtable_1.8-4
## [88] tidyr_1.1.3 munsell_0.5.0 registry_0.5-1
## [91] viridisLite_0.3.0 bslib_0.2.4