Vignettes:
N.B. delta=0.25, p.threshold=0.0001
library(DSS)
## Loading required package: Biobase
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: BiocParallel
## Loading required package: bsseq
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## The following object is masked from 'package:Biobase':
##
## rowMedians
## Loading required package: parallel
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:IRanges':
##
## slice
## The following object is masked from 'package:S4Vectors':
##
## rename
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
library(scales)
source("../Methylation_helper.R")
<- params$InputFolder
InputFolder <- params$OutputFolder OutputFolder
Loading of bsseq object with CpGs shared by at least 75% of samples.
<- readRDS("~/DataDir/2.DifferentialAnalysis/Input/bsseq_obj_sharedby75ofall.rds") #getting bsseq object where sample selection and CpG filtering were already performed bsseq_obj
PCA plot
This PCA is done on the new filtered BSseq object (different from the one in the preliminary exploration script!)
<- c(hiPSCs ="#dd1c77", iMeLCs ="#377eb8", hPGCLCs ="#4daf4a", hEGCLCs = "#ff7f00") cellTypes_colors
<- do_PCA(getMeth(BSseq = bsseq_obj, type = "raw"))
pca_res plot_PCA(pca_res = pca_res, anno = pData(bsseq_obj), col_anno = "Type", shape_anno = NULL, custom_colors = cellTypes_colors, point_size = 5)
plot_PCA(pca_res = pca_res, anno = pData(bsseq_obj), col_anno = "Type", shape_anno = NULL, pc_x = "PC2", pc_y = "PC3", custom_colors = cellTypes_colors, point_size = 5)
Loading results obtained with DSS::DMLtest
function
<- list.files(path = InputFolder, pattern = '*\\.rds', full.names = TRUE)
dml_tests_files <- dml_tests_files[-1] dml_tests_files
Storing DMRs and DMLs Plotting number of DMRs
<- length(dml_tests_files)
nr_tests
<- list()
DM_tests_Res <- list()
DMR_Res <- list()
DML_Res
for(i in 1:nr_tests){
<- readRDS(dml_tests_files[i])
DM_tests_Res[[i]] <- list()
DMR_Res[[i]] <- list()
DML_Res[[i]]
names(DM_tests_Res)[i] <- sub(".rds", "", basename(dml_tests_files))[i]
names(DMR_Res)[i] <- sub(".rds", "", basename(dml_tests_files))[i]
names(DML_Res)[i] <- sub(".rds", "", basename(dml_tests_files))[i]
<- callDMR(DM_tests_Res[[i]], delta=0.25, p.threshold=0.0001)
DMR_Res[[i]] <- callDML(DM_tests_Res[[i]], delta=0.25, p.threshold=0.0001)
DML_Res[[i]]
}
DMRbarplot(DMR=DMR_Res, comparison=1, y_lim = c(0,2400), interactive=TRUE)
DMRbarplot_length(DMR=DMR_Res, comparison=1)
DMRbarplot(DMR=DMR_Res, comparison=3, y_lim = c(0,2400), interactive=TRUE)
DMRbarplot_length(DMR=DMR_Res, comparison=3)
DMRbarplot(DMR=DMR_Res, comparison=4, y_lim = c(0,2400), interactive=TRUE)
DMRbarplot_length(DMR=DMR_Res, comparison=4)
DMRbarplot(DMR=DMR_Res, comparison=5, y_lim = c(0,2400), interactive=TRUE)
DMRbarplot_length(DMR=DMR_Res, comparison=5)
DMRbarplot(DMR=DMR_Res, comparison=6, y_lim = c(0,2400), interactive=TRUE)
DMRbarplot_length(DMR=DMR_Res, comparison=6)
DMRbarplot(DMR=DMR_Res, comparison=7, y_lim = c(0,2400), interactive=TRUE)
DMRbarplot_length(DMR=DMR_Res, comparison=7)
Barplot with all together
<- c("hiPSCsvsiMeLCs", "iMeLCsvshPGCLCs", "hiPSCsvshPGCLCs", "hEGCLCsvshPGCLCs", "hEGCLCsvsiMeLCs", "hEGCLCsvshiPSCs") #ordered with criteria
comparisons
<- c("hiPSCsvsiMeLCs" = "iMeLCs vs hiPSCs",
new_ylabel "iMeLCsvshPGCLCs" = "hPGCLCs vs iMeLCs",
"hiPSCsvshPGCLCs" = "hPGCLCs vs hiPSCs",
"hEGCLCsvshPGCLCs" = "hPGCLCs vs hEGCLCs",
"hEGCLCsvsiMeLCs" = "iMeLCs vs hEGCLCs",
"hEGCLCsvshiPSCs" = "hiPSCs vs hEGCLCs")
DMRbarplot_all(DMR=DMR_Res, comparisons_ordered=comparisons, label_y=new_ylabel, y_lim = 2400, interactive=TRUE)
N.B: Q: Why doesn’t callDMR function return FDR for identified DMRs?
A: In DMR calling, the statistical test is perform for each CpG, and then the significant CpG are merged into regions. It is very difficult to estimate FDR at the region-level, based on the site level test results (p-values or FDR). This is a difficult question for all types of genome-wide assays such as the ChIP-seq. So I rather not to report a DMR-level FDR if it’s not accurate.
saveRDS(DM_tests_Res, paste0(InputFolder, '/', 'DML_test_results.rds'))
saveRDS(DMR_Res, paste0(OutputFolder, '/', 'DMRs.rds'))
saveRDS(DML_Res, paste0(OutputFolder, '/', 'DMLs.rds'))
<- sessionInfo()
SessionInfo <- date() Date
Date
## [1] "Thu Jan 9 18:10:07 2025"
SessionInfo
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] scales_1.2.1 plotly_4.10.2
## [3] ggplot2_3.4.2 DSS_2.46.0
## [5] bsseq_1.34.0 SummarizedExperiment_1.28.0
## [7] MatrixGenerics_1.10.0 matrixStats_1.0.0
## [9] GenomicRanges_1.50.2 GenomeInfoDb_1.34.9
## [11] IRanges_2.32.0 S4Vectors_0.36.2
## [13] BiocParallel_1.32.6 Biobase_2.58.0
## [15] BiocGenerics_0.44.0
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.6 tidyr_1.3.0
## [3] sass_0.4.7 viridisLite_0.4.2
## [5] jsonlite_1.8.7 splines_4.2.1
## [7] DelayedMatrixStats_1.20.0 R.utils_2.12.2
## [9] gtools_3.9.4 bslib_0.5.0
## [11] highr_0.10 BSgenome_1.66.3
## [13] GenomeInfoDbData_1.2.9 Rsamtools_2.14.0
## [15] yaml_2.3.7 pillar_1.9.0
## [17] lattice_0.21-8 glue_1.6.2
## [19] limma_3.54.2 digest_0.6.33
## [21] RColorBrewer_1.1-3 XVector_0.38.0
## [23] colorspace_2.1-0 htmltools_0.5.5
## [25] Matrix_1.6-0 R.oo_1.25.0
## [27] XML_3.99-0.14 pkgconfig_2.0.3
## [29] zlibbioc_1.44.0 purrr_1.0.1
## [31] HDF5Array_1.26.0 tibble_3.2.1
## [33] farver_2.1.1 generics_0.1.3
## [35] ellipsis_0.3.2 withr_2.5.0
## [37] cachem_1.0.8 lazyeval_0.2.2
## [39] cli_3.6.1 magrittr_2.0.3
## [41] crayon_1.5.2 evaluate_0.21
## [43] R.methodsS3_1.8.2 fansi_1.0.4
## [45] tools_4.2.1 data.table_1.14.8
## [47] BiocIO_1.8.0 lifecycle_1.0.3
## [49] Rhdf5lib_1.20.0 munsell_0.5.0
## [51] locfit_1.5-9.7 DelayedArray_0.24.0
## [53] Biostrings_2.66.0 compiler_4.2.1
## [55] jquerylib_0.1.4 rlang_1.1.1
## [57] rhdf5_2.42.1 grid_4.2.1
## [59] RCurl_1.98-1.12 rhdf5filters_1.10.1
## [61] rstudioapi_0.15.0 htmlwidgets_1.6.2
## [63] rjson_0.2.21 crosstalk_1.2.0
## [65] labeling_0.4.2 bitops_1.0-7
## [67] rmarkdown_2.23 restfulr_0.0.15
## [69] gtable_0.3.3 codetools_0.2-19
## [71] R6_2.5.1 GenomicAlignments_1.34.1
## [73] knitr_1.43 dplyr_1.1.2
## [75] rtracklayer_1.58.0 fastmap_1.1.1
## [77] utf8_1.2.3 permute_0.9-7
## [79] Rcpp_1.0.11 vctrs_0.6.3
## [81] tidyselect_1.2.0 xfun_0.39
## [83] sparseMatrixStats_1.10.0