library(ggplot2)
library(scales)
library(gridExtra)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(dmrseq)
## Loading required package: bsseq
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(annotatr)
library(sechm)
Loading of bsseq object.
<- readRDS("~/DataDir/1.PreliminaryAnalysis/Output/WithSelectedEGCLCs/bsseq_obj_sharedby75ofall.rds") #getting bsseq object where sample selection and CpG filtering were already performed bsseq_obj
<- readxl::read_xlsx("~/DataDir/5.DMRInterpretation/imprinted_region_all_for_meth.xlsx")
ImprintedRegions
#Removal of regions for which genomic positions were not retrieved
<- ImprintedRegions[!is.na(ImprintedRegions$Start), ]
ImprintedRegions
<- makeGRangesFromDataFrame(ImprintedRegions, seqnames.field = "Chr", start.field = "Start", end.field = "End", keep.extra.columns = TRUE) ImprintedRegions_GRanges
These heatmaps show the methylation levels of all the CpGs we covered that fall within the imprinted regions.
<- findOverlaps(rowRanges(bsseq_obj), ImprintedRegions_GRanges)
m
<- bsseq_obj[queryHits(m),]
bsseq_obj_subset
rowData(bsseq_obj_subset)$Gene <- mcols(ImprintedRegions_GRanges[subjectHits(m), ])$"Gene locus"
rowData(bsseq_obj_subset)$DMRStatus <- mcols(ImprintedRegions_GRanges[subjectHits(m), ])$DMRStatus
#rowData(bsseq_obj_subset)$Reference <- mcols(ImprintedRegions_GRanges[subjectHits(m), ])$Reference
assays(bsseq_obj_subset)$Meth <- getMeth(bsseq_obj_subset, type = "raw")
<- c('darkblue', "purple", "white", "lightgoldenrod1", 'goldenrod1')
ScaledCols
<- c(hiPSCs ="#dd1c77", iMeLCs ="#377eb8", hPGCLCs ="#4daf4a", hEGCLCs = "#ff7f00")
cellTypes_colors colData(bsseq_obj_subset)$Type <- factor(colData(bsseq_obj_subset)$Type, levels = c("hiPSCs", "iMeLCs" , "hPGCLCs", "hEGCLCs"))
metadata(bsseq_obj_subset)$anno_colors <- list(Type = cellTypes_colors,
GenomicRegion = c ("3' UTR" = "#FFADAD", "5' UTR" = "#FFD6A5", "Distal Intergenic" = "#FDFFB6", "Downstream (<=300bp)" = "#FFFFFC", "Exon" = "#9BF6FF", "Intron" = "#A0C4FF", "Promoter (<=1kb)" = "#BDB2FF", "Promoter (1-2kb)" = "#FFC6FF", "Promoter (2-3kb)" = "#CAFFBF"))
<- c("hiPSC_rep1" = "#dd1c77", "hiPSC_rep2" = "#dd1c77", "hiPSC_rep3" = "#dd1c77", "hiPSC_rep4" = "#dd1c77",
sample_colors "iMeLC_rep1" = "#377eb8", "iMeLC_rep2" = "#377eb8", "iMeLC_rep3" = "#377eb8", "iMeLC_rep4" = "#377eb8",
"hPGCLC_rep1" = "#4daf4a", "hPGCLC_rep2" = "#4daf4a", "hPGCLC_rep3" = "#4daf4a",
"hEGCLC_rep1" = "#ff7f00", "hEGCLC_rep2" = "#ff7f00", "hEGCLC_rep3" = "#ff7f00")
rownames(bsseq_obj_subset) <- c(1:nrow(bsseq_obj_subset))
In total we have 5962 CpGs in all the imprinted regions.
This shows all CpGs as rows.
<- assays(bsseq_obj_subset)$Meth %>% quantile(seq(0,1, by=0.1), na.rm =TRUE)
break_coord <- viridisLite::plasma(length(seq(head(break_coord,1), tail(break_coord,1), by=0.2)))
ExpCols
sechm(bsseq_obj_subset, features = rownames(bsseq_obj_subset), assayName="Meth", hmcols=ExpCols, breaks=seq(head(break_coord,1), tail(break_coord,1), by=0.2), show_colnames=TRUE, do.scale=FALSE, column_title = "Methylation levels of all CpGs in imprinted regions", top_annotation = c("Type"), left_annotation = c("Gene"), gaps_row = "Gene", row_title_rot = 0)
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
This shows the average methylation levels.
<- aggregate(assays(bsseq_obj_subset)$Meth, list(rowData(bsseq_obj_subset)$Gene), mean, na.rm = T)
AvgMeth rownames(AvgMeth) <- AvgMeth$Group.1
<- AvgMeth[, -1]
AvgMeth
<- rowData(bsseq_obj_subset)
rowDataSE rownames(rowDataSE) <- rowData(bsseq_obj_subset)$Gene
<- rowDataSE[rownames(AvgMeth), ]
rowDataSE <- SummarizedExperiment(assays=AvgMeth, colData=colData(bsseq_obj_subset), rowData = rowDataSE, )
SE
identical(rownames(assay(SE)), rownames(rowData(SE))) #TRUE
## [1] TRUE
metadata(SE)$anno_colors <- list(Type = cellTypes_colors,
GenomicRegion = c ("3' UTR" = "#FFADAD", "5' UTR" = "#FFD6A5", "Distal Intergenic" = "#FDFFB6", "Downstream (<=300bp)" = "#FFFFFC", "Exon" = "#9BF6FF", "Intron" = "#A0C4FF", "Promoter (<=1kb)" = "#BDB2FF", "Promoter (1-2kb)" = "#FFC6FF", "Promoter (2-3kb)" = "#CAFFBF"))
sechm(SE, features = rownames(SE), hmcols=ExpCols, breaks=seq(head(break_coord,1), tail(break_coord,1), by=0.2), show_rownames=TRUE, show_colnames=TRUE, do.scale=FALSE, column_title = "Average methylation levels of CpGs in imprinted regions", top_annotation = c("Type"), row_title_rot = 0)
Clustering by row
sechm(SE, features = rownames(SE), hmcols=ExpCols, breaks=seq(head(break_coord,1), tail(break_coord,1), by=0.2), show_rownames=TRUE, show_colnames=TRUE, do.scale=FALSE, column_title = "Average methylation levels of CpGs in imprinted regions", top_annotation = c("Type"), row_title_rot = 0, cluster_rows = TRUE)
Sechm removes genes with a NaN value in at least one sample…. Let’s plot also these genes with black box in case of NaNs.
if(any(is.na(assay(SE)))){
assay(SE)[is.na(assay(SE))] <- -1
}
<- c("black", ExpCols)
ExpCols
sechm(SE, features = rownames(SE), hmcols=ExpCols, breaks=c(-1, seq(head(break_coord,1), tail(break_coord,1), by=0.2)), show_rownames = TRUE, show_colnames=TRUE, do.scale=FALSE, column_title = "Average methylation levels of CpGs in imprinted regions (with NaN)", top_annotation = c("Type"), row_title_rot = 0)
<- sessionInfo()
SessionInfo <- date() Date
Date## [1] "Mon Jan 13 11:59:22 2025"
SessionInfo## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] sechm_1.6.0 annotatr_1.24.0
## [3] dmrseq_1.18.1 bsseq_1.34.0
## [5] SummarizedExperiment_1.28.0 Biobase_2.58.0
## [7] MatrixGenerics_1.10.0 matrixStats_1.0.0
## [9] GenomicRanges_1.50.2 GenomeInfoDb_1.34.9
## [11] IRanges_2.32.0 S4Vectors_0.36.2
## [13] BiocGenerics_0.44.0 dplyr_1.1.2
## [15] gridExtra_2.3 scales_1.2.1
## [17] ggplot2_3.4.2
##
## loaded via a namespace (and not attached):
## [1] readxl_1.4.3 circlize_0.4.15
## [3] AnnotationHub_3.6.0 BiocFileCache_2.6.1
## [5] plyr_1.8.8 splines_4.2.1
## [7] BiocParallel_1.32.6 digest_0.6.33
## [9] ca_0.71.1 foreach_1.5.2
## [11] htmltools_0.5.5 fansi_1.0.4
## [13] magrittr_2.0.3 memoise_2.0.1
## [15] BSgenome_1.66.3 cluster_2.1.4
## [17] doParallel_1.0.17 tzdb_0.4.0
## [19] limma_3.54.2 ComplexHeatmap_2.14.0
## [21] Biostrings_2.66.0 readr_2.1.4
## [23] R.utils_2.12.2 prettyunits_1.1.1
## [25] colorspace_2.1-0 blob_1.2.4
## [27] rappdirs_0.3.3 xfun_0.39
## [29] crayon_1.5.2 RCurl_1.98-1.12
## [31] jsonlite_1.8.7 iterators_1.0.14
## [33] glue_1.6.2 registry_0.5-1
## [35] gtable_0.3.3 zlibbioc_1.44.0
## [37] XVector_0.38.0 V8_4.3.0
## [39] GetoptLong_1.0.5 DelayedArray_0.24.0
## [41] Rhdf5lib_1.20.0 shape_1.4.6
## [43] HDF5Array_1.26.0 DBI_1.1.3
## [45] rngtools_1.5.2 randomcoloR_1.1.0.1
## [47] Rcpp_1.0.11 viridisLite_0.4.2
## [49] xtable_1.8-4 progress_1.2.2
## [51] clue_0.3-64 bumphunter_1.40.0
## [53] bit_4.0.5 httr_1.4.6
## [55] RColorBrewer_1.1-3 ellipsis_0.3.2
## [57] pkgconfig_2.0.3 XML_3.99-0.14
## [59] R.methodsS3_1.8.2 sass_0.4.7
## [61] dbplyr_2.3.3 locfit_1.5-9.7
## [63] utf8_1.2.3 tidyselect_1.2.0
## [65] rlang_1.1.1 reshape2_1.4.4
## [67] later_1.3.1 AnnotationDbi_1.60.2
## [69] cellranger_1.1.0 munsell_0.5.0
## [71] BiocVersion_3.16.0 tools_4.2.1
## [73] cachem_1.0.8 cli_3.6.1
## [75] generics_0.1.3 RSQLite_2.3.1
## [77] evaluate_0.21 stringr_1.5.0
## [79] fastmap_1.1.1 yaml_2.3.7
## [81] outliers_0.15 knitr_1.43
## [83] bit64_4.0.5 KEGGREST_1.38.0
## [85] nlme_3.1-162 doRNG_1.8.6
## [87] sparseMatrixStats_1.10.0 mime_0.12
## [89] R.oo_1.25.0 xml2_1.3.5
## [91] biomaRt_2.54.1 compiler_4.2.1
## [93] rstudioapi_0.15.0 filelock_1.0.2
## [95] curl_5.0.1 png_0.1-8
## [97] interactiveDisplayBase_1.36.0 tibble_3.2.1
## [99] bslib_0.5.0 stringi_1.7.12
## [101] highr_0.10 GenomicFeatures_1.50.4
## [103] lattice_0.21-8 Matrix_1.6-0
## [105] permute_0.9-7 vctrs_0.6.3
## [107] pillar_1.9.0 lifecycle_1.0.3
## [109] rhdf5filters_1.10.1 BiocManager_1.30.20
## [111] GlobalOptions_0.1.2 jquerylib_0.1.4
## [113] data.table_1.14.8 bitops_1.0-7
## [115] seriation_1.4.2 httpuv_1.6.11
## [117] rtracklayer_1.58.0 R6_2.5.1
## [119] BiocIO_1.8.0 TSP_1.2-4
## [121] promises_1.2.0.1 codetools_0.2-19
## [123] gtools_3.9.4 rhdf5_2.42.1
## [125] rjson_0.2.21 withr_2.5.0
## [127] regioneR_1.30.0 GenomicAlignments_1.34.1
## [129] Rsamtools_2.14.0 GenomeInfoDbData_1.2.9
## [131] parallel_4.2.1 hms_1.1.3
## [133] grid_4.2.1 rmarkdown_2.23
## [135] DelayedMatrixStats_1.20.0 Cairo_1.6-0
## [137] Rtsne_0.16 shiny_1.7.4.1
## [139] restfulr_0.0.15