1. Environment setting

library(ggplot2)
library(scales)
library(gridExtra)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(dmrseq)
## Loading required package: bsseq
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(annotatr)
library(sechm)

2. Loading data

2.1 BSSeq objects

Loading of bsseq object.

bsseq_obj <- readRDS("~/DataDir/1.PreliminaryAnalysis/Output/WithSelectedEGCLCs/bsseq_obj_sharedby75ofall.rds") #getting bsseq object where sample selection and CpG filtering were already performed 

2.2 Get imprinted regions

ImprintedRegions <- readxl::read_xlsx("~/DataDir/5.DMRInterpretation/imprinted_region_all_for_meth.xlsx")

#Removal of regions for which genomic positions were not retrieved
ImprintedRegions <- ImprintedRegions[!is.na(ImprintedRegions$Start), ]

ImprintedRegions_GRanges <- makeGRangesFromDataFrame(ImprintedRegions, seqnames.field = "Chr", start.field = "Start", end.field = "End", keep.extra.columns = TRUE)

3. Heatmaps

These heatmaps show the methylation levels of all the CpGs we covered that fall within the imprinted regions.

m <- findOverlaps(rowRanges(bsseq_obj), ImprintedRegions_GRanges)

bsseq_obj_subset <- bsseq_obj[queryHits(m),]

rowData(bsseq_obj_subset)$Gene <- mcols(ImprintedRegions_GRanges[subjectHits(m), ])$"Gene locus"
rowData(bsseq_obj_subset)$DMRStatus <- mcols(ImprintedRegions_GRanges[subjectHits(m), ])$DMRStatus
#rowData(bsseq_obj_subset)$Reference <- mcols(ImprintedRegions_GRanges[subjectHits(m), ])$Reference
assays(bsseq_obj_subset)$Meth <- getMeth(bsseq_obj_subset, type = "raw")

ScaledCols <- c('darkblue', "purple", "white", "lightgoldenrod1", 'goldenrod1')

cellTypes_colors <- c(hiPSCs ="#dd1c77", iMeLCs ="#377eb8", hPGCLCs ="#4daf4a", hEGCLCs = "#ff7f00")
colData(bsseq_obj_subset)$Type <- factor(colData(bsseq_obj_subset)$Type, levels = c("hiPSCs", "iMeLCs" , "hPGCLCs", "hEGCLCs"))

metadata(bsseq_obj_subset)$anno_colors <- list(Type = cellTypes_colors,
                                               GenomicRegion = c ("3' UTR" = "#FFADAD", "5' UTR" = "#FFD6A5", "Distal Intergenic" = "#FDFFB6", "Downstream (<=300bp)" = "#FFFFFC", "Exon" = "#9BF6FF", "Intron" = "#A0C4FF", "Promoter (<=1kb)" = "#BDB2FF", "Promoter (1-2kb)" = "#FFC6FF", "Promoter (2-3kb)" = "#CAFFBF"))

sample_colors <- c("hiPSC_rep1" = "#dd1c77", "hiPSC_rep2" = "#dd1c77", "hiPSC_rep3" = "#dd1c77", "hiPSC_rep4" = "#dd1c77",
                   "iMeLC_rep1" = "#377eb8", "iMeLC_rep2" = "#377eb8", "iMeLC_rep3" = "#377eb8", "iMeLC_rep4" = "#377eb8",
                   "hPGCLC_rep1" = "#4daf4a", "hPGCLC_rep2" = "#4daf4a", "hPGCLC_rep3" = "#4daf4a",
                   "hEGCLC_rep1" = "#ff7f00", "hEGCLC_rep2" = "#ff7f00", "hEGCLC_rep3" = "#ff7f00")

rownames(bsseq_obj_subset) <- c(1:nrow(bsseq_obj_subset))

In total we have 5962 CpGs in all the imprinted regions.

3.1 Methylation levels (in %)

This shows all CpGs as rows.

break_coord <- assays(bsseq_obj_subset)$Meth %>% quantile(seq(0,1, by=0.1), na.rm =TRUE)
ExpCols <- viridisLite::plasma(length(seq(head(break_coord,1), tail(break_coord,1), by=0.2)))

sechm(bsseq_obj_subset, features = rownames(bsseq_obj_subset), assayName="Meth", hmcols=ExpCols, breaks=seq(head(break_coord,1), tail(break_coord,1), by=0.2), show_colnames=TRUE, do.scale=FALSE, column_title = "Methylation levels of all CpGs in imprinted regions", top_annotation = c("Type"), left_annotation = c("Gene"), gaps_row = "Gene", row_title_rot = 0) 
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.

This shows the average methylation levels.

AvgMeth <- aggregate(assays(bsseq_obj_subset)$Meth, list(rowData(bsseq_obj_subset)$Gene), mean, na.rm = T)
rownames(AvgMeth) <- AvgMeth$Group.1
AvgMeth <- AvgMeth[, -1]

rowDataSE <- rowData(bsseq_obj_subset)
rownames(rowDataSE) <- rowData(bsseq_obj_subset)$Gene
rowDataSE <- rowDataSE[rownames(AvgMeth), ] 
SE <- SummarizedExperiment(assays=AvgMeth, colData=colData(bsseq_obj_subset), rowData = rowDataSE, )

identical(rownames(assay(SE)), rownames(rowData(SE))) #TRUE
## [1] TRUE

metadata(SE)$anno_colors <- list(Type = cellTypes_colors,
                                               GenomicRegion = c ("3' UTR" = "#FFADAD", "5' UTR" = "#FFD6A5", "Distal Intergenic" = "#FDFFB6", "Downstream (<=300bp)" = "#FFFFFC", "Exon" = "#9BF6FF", "Intron" = "#A0C4FF", "Promoter (<=1kb)" = "#BDB2FF", "Promoter (1-2kb)" = "#FFC6FF", "Promoter (2-3kb)" = "#CAFFBF"))
sechm(SE, features = rownames(SE), hmcols=ExpCols, breaks=seq(head(break_coord,1), tail(break_coord,1), by=0.2), show_rownames=TRUE, show_colnames=TRUE, do.scale=FALSE, column_title = "Average methylation levels of CpGs in imprinted regions", top_annotation = c("Type"), row_title_rot = 0) 

Clustering by row

sechm(SE, features = rownames(SE), hmcols=ExpCols, breaks=seq(head(break_coord,1), tail(break_coord,1), by=0.2), show_rownames=TRUE, show_colnames=TRUE, do.scale=FALSE, column_title = "Average methylation levels of CpGs in imprinted regions", top_annotation = c("Type"), row_title_rot = 0, cluster_rows = TRUE) 

Sechm removes genes with a NaN value in at least one sample…. Let’s plot also these genes with black box in case of NaNs.

if(any(is.na(assay(SE)))){
  assay(SE)[is.na(assay(SE))] <- -1
}

ExpCols <- c("black", ExpCols)

sechm(SE, features = rownames(SE), hmcols=ExpCols, breaks=c(-1, seq(head(break_coord,1), tail(break_coord,1), by=0.2)), show_rownames = TRUE, show_colnames=TRUE, do.scale=FALSE, column_title = "Average methylation levels of CpGs in imprinted regions (with NaN)", top_annotation = c("Type"), row_title_rot = 0) 

3. Session Info

SessionInfo <- sessionInfo()
Date <- date()
Date
## [1] "Mon Jan 13 11:59:22 2025"
SessionInfo
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] sechm_1.6.0                 annotatr_1.24.0            
##  [3] dmrseq_1.18.1               bsseq_1.34.0               
##  [5] SummarizedExperiment_1.28.0 Biobase_2.58.0             
##  [7] MatrixGenerics_1.10.0       matrixStats_1.0.0          
##  [9] GenomicRanges_1.50.2        GenomeInfoDb_1.34.9        
## [11] IRanges_2.32.0              S4Vectors_0.36.2           
## [13] BiocGenerics_0.44.0         dplyr_1.1.2                
## [15] gridExtra_2.3               scales_1.2.1               
## [17] ggplot2_3.4.2              
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.4.3                  circlize_0.4.15              
##   [3] AnnotationHub_3.6.0           BiocFileCache_2.6.1          
##   [5] plyr_1.8.8                    splines_4.2.1                
##   [7] BiocParallel_1.32.6           digest_0.6.33                
##   [9] ca_0.71.1                     foreach_1.5.2                
##  [11] htmltools_0.5.5               fansi_1.0.4                  
##  [13] magrittr_2.0.3                memoise_2.0.1                
##  [15] BSgenome_1.66.3               cluster_2.1.4                
##  [17] doParallel_1.0.17             tzdb_0.4.0                   
##  [19] limma_3.54.2                  ComplexHeatmap_2.14.0        
##  [21] Biostrings_2.66.0             readr_2.1.4                  
##  [23] R.utils_2.12.2                prettyunits_1.1.1            
##  [25] colorspace_2.1-0              blob_1.2.4                   
##  [27] rappdirs_0.3.3                xfun_0.39                    
##  [29] crayon_1.5.2                  RCurl_1.98-1.12              
##  [31] jsonlite_1.8.7                iterators_1.0.14             
##  [33] glue_1.6.2                    registry_0.5-1               
##  [35] gtable_0.3.3                  zlibbioc_1.44.0              
##  [37] XVector_0.38.0                V8_4.3.0                     
##  [39] GetoptLong_1.0.5              DelayedArray_0.24.0          
##  [41] Rhdf5lib_1.20.0               shape_1.4.6                  
##  [43] HDF5Array_1.26.0              DBI_1.1.3                    
##  [45] rngtools_1.5.2                randomcoloR_1.1.0.1          
##  [47] Rcpp_1.0.11                   viridisLite_0.4.2            
##  [49] xtable_1.8-4                  progress_1.2.2               
##  [51] clue_0.3-64                   bumphunter_1.40.0            
##  [53] bit_4.0.5                     httr_1.4.6                   
##  [55] RColorBrewer_1.1-3            ellipsis_0.3.2               
##  [57] pkgconfig_2.0.3               XML_3.99-0.14                
##  [59] R.methodsS3_1.8.2             sass_0.4.7                   
##  [61] dbplyr_2.3.3                  locfit_1.5-9.7               
##  [63] utf8_1.2.3                    tidyselect_1.2.0             
##  [65] rlang_1.1.1                   reshape2_1.4.4               
##  [67] later_1.3.1                   AnnotationDbi_1.60.2         
##  [69] cellranger_1.1.0              munsell_0.5.0                
##  [71] BiocVersion_3.16.0            tools_4.2.1                  
##  [73] cachem_1.0.8                  cli_3.6.1                    
##  [75] generics_0.1.3                RSQLite_2.3.1                
##  [77] evaluate_0.21                 stringr_1.5.0                
##  [79] fastmap_1.1.1                 yaml_2.3.7                   
##  [81] outliers_0.15                 knitr_1.43                   
##  [83] bit64_4.0.5                   KEGGREST_1.38.0              
##  [85] nlme_3.1-162                  doRNG_1.8.6                  
##  [87] sparseMatrixStats_1.10.0      mime_0.12                    
##  [89] R.oo_1.25.0                   xml2_1.3.5                   
##  [91] biomaRt_2.54.1                compiler_4.2.1               
##  [93] rstudioapi_0.15.0             filelock_1.0.2               
##  [95] curl_5.0.1                    png_0.1-8                    
##  [97] interactiveDisplayBase_1.36.0 tibble_3.2.1                 
##  [99] bslib_0.5.0                   stringi_1.7.12               
## [101] highr_0.10                    GenomicFeatures_1.50.4       
## [103] lattice_0.21-8                Matrix_1.6-0                 
## [105] permute_0.9-7                 vctrs_0.6.3                  
## [107] pillar_1.9.0                  lifecycle_1.0.3              
## [109] rhdf5filters_1.10.1           BiocManager_1.30.20          
## [111] GlobalOptions_0.1.2           jquerylib_0.1.4              
## [113] data.table_1.14.8             bitops_1.0-7                 
## [115] seriation_1.4.2               httpuv_1.6.11                
## [117] rtracklayer_1.58.0            R6_2.5.1                     
## [119] BiocIO_1.8.0                  TSP_1.2-4                    
## [121] promises_1.2.0.1              codetools_0.2-19             
## [123] gtools_3.9.4                  rhdf5_2.42.1                 
## [125] rjson_0.2.21                  withr_2.5.0                  
## [127] regioneR_1.30.0               GenomicAlignments_1.34.1     
## [129] Rsamtools_2.14.0              GenomeInfoDbData_1.2.9       
## [131] parallel_4.2.1                hms_1.1.3                    
## [133] grid_4.2.1                    rmarkdown_2.23               
## [135] DelayedMatrixStats_1.20.0     Cairo_1.6-0                  
## [137] Rtsne_0.16                    shiny_1.7.4.1                
## [139] restfulr_0.0.15