Vignettes:

N.B. delta=0.25, p.threshold=0.0001

1. Environment setting

library(DSS)
## Loading required package: Biobase
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: BiocParallel
## Loading required package: bsseq
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## The following object is masked from 'package:Biobase':
## 
##     rowMedians
## Loading required package: parallel
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:IRanges':
## 
##     slice
## The following object is masked from 'package:S4Vectors':
## 
##     rename
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggplot2)
library(scales)

source("../Methylation_helper.R")
InputFolder <- params$InputFolder
OutputFolder <- params$OutputFolder

2. Loading of BSSeq object

Loading of bsseq object with CpGs shared by at least 75% of samples.

bsseq_obj <- readRDS("~/DataDir/2.DifferentialAnalysis/Input/bsseq_obj_sharedby75ofall.rds") #getting bsseq object where sample selection and CpG filtering were already performed 

PCA plot

This PCA is done on the new filtered BSseq object (different from the one in the preliminary exploration script!)

cellTypes_colors <- c(hiPSCs ="#dd1c77", iMeLCs ="#377eb8", hPGCLCs ="#4daf4a", hEGCLCs = "#ff7f00")
pca_res <- do_PCA(getMeth(BSseq = bsseq_obj, type = "raw"))
plot_PCA(pca_res = pca_res, anno = pData(bsseq_obj), col_anno = "Type", shape_anno = NULL, custom_colors = cellTypes_colors, point_size = 5)

plot_PCA(pca_res = pca_res, anno = pData(bsseq_obj), col_anno = "Type", shape_anno = NULL, pc_x = "PC2", pc_y = "PC3", custom_colors = cellTypes_colors, point_size = 5)

3. Differential Methylation analysis

Loading results obtained with DSS::DMLtest function

dml_tests_files <-  list.files(path = InputFolder, pattern = '*\\.rds', full.names = TRUE) 
dml_tests_files <- dml_tests_files[-1]

Storing DMRs and DMLs Plotting number of DMRs

nr_tests <- length(dml_tests_files)

DM_tests_Res <- list()
DMR_Res <- list()
DML_Res <- list()

for(i in 1:nr_tests){
  
  DM_tests_Res[[i]] <- readRDS(dml_tests_files[i])
  DMR_Res[[i]] <- list()
  DML_Res[[i]] <- list()
  
  names(DM_tests_Res)[i] <- sub(".rds", "", basename(dml_tests_files))[i]
  names(DMR_Res)[i] <- sub(".rds", "", basename(dml_tests_files))[i]
  names(DML_Res)[i] <- sub(".rds", "", basename(dml_tests_files))[i]
  
  DMR_Res[[i]] <- callDMR(DM_tests_Res[[i]], delta=0.25, p.threshold=0.0001)
  DML_Res[[i]] <- callDML(DM_tests_Res[[i]], delta=0.25, p.threshold=0.0001)
  
  }
DMRbarplot(DMR=DMR_Res, comparison=1, y_lim = c(0,2400), interactive=TRUE)
DMRbarplot_length(DMR=DMR_Res, comparison=1)
DMRbarplot(DMR=DMR_Res, comparison=3, y_lim = c(0,2400), interactive=TRUE)
DMRbarplot_length(DMR=DMR_Res, comparison=3)
DMRbarplot(DMR=DMR_Res, comparison=4, y_lim = c(0,2400), interactive=TRUE)
DMRbarplot_length(DMR=DMR_Res, comparison=4)
DMRbarplot(DMR=DMR_Res, comparison=5, y_lim = c(0,2400), interactive=TRUE)
DMRbarplot_length(DMR=DMR_Res, comparison=5)
DMRbarplot(DMR=DMR_Res, comparison=6, y_lim = c(0,2400), interactive=TRUE)
DMRbarplot_length(DMR=DMR_Res, comparison=6)
DMRbarplot(DMR=DMR_Res, comparison=7, y_lim = c(0,2400), interactive=TRUE)
DMRbarplot_length(DMR=DMR_Res, comparison=7)

Barplot with all together

comparisons <- c("hiPSCsvsiMeLCs", "iMeLCsvshPGCLCs", "hiPSCsvshPGCLCs", "hEGCLCsvshPGCLCs", "hEGCLCsvsiMeLCs", "hEGCLCsvshiPSCs") #ordered with criteria

new_ylabel <- c("hiPSCsvsiMeLCs" = "iMeLCs vs hiPSCs", 
                "iMeLCsvshPGCLCs" = "hPGCLCs vs iMeLCs", 
                "hiPSCsvshPGCLCs" = "hPGCLCs vs hiPSCs",
                "hEGCLCsvshPGCLCs" = "hPGCLCs vs hEGCLCs",
                "hEGCLCsvsiMeLCs" = "iMeLCs vs hEGCLCs",
                "hEGCLCsvshiPSCs" = "hiPSCs vs hEGCLCs")

DMRbarplot_all(DMR=DMR_Res, comparisons_ordered=comparisons, label_y=new_ylabel, y_lim = 2400, interactive=TRUE)

4. Interesting FAQ

N.B: Q: Why doesn’t callDMR function return FDR for identified DMRs?

A: In DMR calling, the statistical test is perform for each CpG, and then the significant CpG are merged into regions. It is very difficult to estimate FDR at the region-level, based on the site level test results (p-values or FDR). This is a difficult question for all types of genome-wide assays such as the ChIP-seq. So I rather not to report a DMR-level FDR if it’s not accurate.

5. Saving DMRs and DMLs and Session Info

saveRDS(DM_tests_Res, paste0(InputFolder, '/', 'DML_test_results.rds'))
saveRDS(DMR_Res, paste0(OutputFolder, '/', 'DMRs.rds'))
saveRDS(DML_Res, paste0(OutputFolder, '/', 'DMLs.rds'))
SessionInfo <- sessionInfo()
Date <- date()
Date
## [1] "Thu Jan  9 18:10:07 2025"
SessionInfo
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] scales_1.2.1                plotly_4.10.2              
##  [3] ggplot2_3.4.2               DSS_2.46.0                 
##  [5] bsseq_1.34.0                SummarizedExperiment_1.28.0
##  [7] MatrixGenerics_1.10.0       matrixStats_1.0.0          
##  [9] GenomicRanges_1.50.2        GenomeInfoDb_1.34.9        
## [11] IRanges_2.32.0              S4Vectors_0.36.2           
## [13] BiocParallel_1.32.6         Biobase_2.58.0             
## [15] BiocGenerics_0.44.0        
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.6                tidyr_1.3.0              
##  [3] sass_0.4.7                viridisLite_0.4.2        
##  [5] jsonlite_1.8.7            splines_4.2.1            
##  [7] DelayedMatrixStats_1.20.0 R.utils_2.12.2           
##  [9] gtools_3.9.4              bslib_0.5.0              
## [11] highr_0.10                BSgenome_1.66.3          
## [13] GenomeInfoDbData_1.2.9    Rsamtools_2.14.0         
## [15] yaml_2.3.7                pillar_1.9.0             
## [17] lattice_0.21-8            glue_1.6.2               
## [19] limma_3.54.2              digest_0.6.33            
## [21] RColorBrewer_1.1-3        XVector_0.38.0           
## [23] colorspace_2.1-0          htmltools_0.5.5          
## [25] Matrix_1.6-0              R.oo_1.25.0              
## [27] XML_3.99-0.14             pkgconfig_2.0.3          
## [29] zlibbioc_1.44.0           purrr_1.0.1              
## [31] HDF5Array_1.26.0          tibble_3.2.1             
## [33] farver_2.1.1              generics_0.1.3           
## [35] ellipsis_0.3.2            withr_2.5.0              
## [37] cachem_1.0.8              lazyeval_0.2.2           
## [39] cli_3.6.1                 magrittr_2.0.3           
## [41] crayon_1.5.2              evaluate_0.21            
## [43] R.methodsS3_1.8.2         fansi_1.0.4              
## [45] tools_4.2.1               data.table_1.14.8        
## [47] BiocIO_1.8.0              lifecycle_1.0.3          
## [49] Rhdf5lib_1.20.0           munsell_0.5.0            
## [51] locfit_1.5-9.7            DelayedArray_0.24.0      
## [53] Biostrings_2.66.0         compiler_4.2.1           
## [55] jquerylib_0.1.4           rlang_1.1.1              
## [57] rhdf5_2.42.1              grid_4.2.1               
## [59] RCurl_1.98-1.12           rhdf5filters_1.10.1      
## [61] rstudioapi_0.15.0         htmlwidgets_1.6.2        
## [63] rjson_0.2.21              crosstalk_1.2.0          
## [65] labeling_0.4.2            bitops_1.0-7             
## [67] rmarkdown_2.23            restfulr_0.0.15          
## [69] gtable_0.3.3              codetools_0.2-19         
## [71] R6_2.5.1                  GenomicAlignments_1.34.1 
## [73] knitr_1.43                dplyr_1.1.2              
## [75] rtracklayer_1.58.0        fastmap_1.1.1            
## [77] utf8_1.2.3                permute_0.9-7            
## [79] Rcpp_1.0.11               vctrs_0.6.3              
## [81] tidyselect_1.2.0          xfun_0.39                
## [83] sparseMatrixStats_1.10.0