library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(scales)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(bsseq)
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(sechm)
source("../Methylation_helper.R")
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:S4Vectors':
##
## expand
<- params$InputFolder InputFolder
Loading bsseq object
<- readRDS(paste0(InputFolder, "bsseq_obj_escapees.rds")) bsseq_obj_escapees
Loading annotated escapees
<- readRDS("~/DataDir/7.EscapeesExploration/Output/EscapeesAnnotationAndFunctional/AnnotatedEscapees.rds") AnnotatedEscapees
I keep escapee CpGs covered by ALL samples
<- methylSig::filter_loci_by_group_coverage(bs = bsseq_obj_escapees, group_column = "Line", min_samples_per_group = c("CTL08A" = dim(bsseq_obj_escapees)[2])) bsseq_obj_escapees_filt
Now we have 8388 CpGs.
Add annotation made in script 2.EscapeeAnnotationAndFunctional.Rmd
<- makeGRangesFromDataFrame(AnnotatedEscapees, keep.extra.columns = TRUE) AnnotatedEscapees_gr
<- findOverlaps(rowRanges(bsseq_obj_escapees_filt), AnnotatedEscapees_gr)
m
<- bsseq_obj_escapees_filt[queryHits(m),]
bsseq_obj_subset
rowData(bsseq_obj_subset)$ensembl_gene_id <- AnnotatedEscapees_gr[subjectHits(m), ]$ensembl_gene_id
Let’s show the average methylation levels. Instead of plotting for each row an escapee CpGs, I aggregate CpGs by ensembl gene id averaging the methylation levels.
<- aggregate(assays(bsseq_obj_subset)$Meth, list(rowData(bsseq_obj_subset)$ensembl_gene_id), mean, na.rm = T)
AvgMeth rownames(AvgMeth) <- AvgMeth$Group.1
<- AvgMeth[, -1]
AvgMeth
<- rowData(bsseq_obj_subset)
rowDataSE rownames(rowDataSE) <- rowData(bsseq_obj_subset)$ensembl_gene_id
<- rowDataSE[rownames(AvgMeth), ]
rowDataSE
<- colData(bsseq_obj_subset)
SampleAnno
<- SummarizedExperiment(assays=AvgMeth, colData=SampleAnno, rowData = rowDataSE)
SE
identical(colnames(assay(SE)), rownames(colData(SE))) #TRUE
## [1] TRUE
<- c(hiPSCs ="#dd1c77", iMeLCs ="#377eb8", hPGCLCs ="#4daf4a", hEGCLCs = "#ff7f00")
cellTypes_colors colData(SE)$Type <- factor(colData(SE)$Type, levels = c("hiPSCs", "iMeLCs" , "hPGCLCs", "hEGCLCs"))
metadata(SE)$anno_colors <- list(Type = cellTypes_colors,
GenomicRegion = c ("3' UTR" = "#FFADAD", "5' UTR" = "#FFD6A5", "Distal Intergenic" = "#FDFFB6", "Downstream (<=300bp)" = "#FFFFFC", "Exon" = "#9BF6FF", "Intron" = "#A0C4FF", "Promoter (<=1kb)" = "#BDB2FF", "Promoter (1-2kb)" = "#FFC6FF", "Promoter (2-3kb)" = "#CAFFBF"))
<- assay(SE) %>% quantile(seq(0,1, by=0.1), na.rm =TRUE)
break_coord <- viridisLite::plasma(length(seq(head(break_coord,1), tail(break_coord,1), by=0.2)))
ExpCols sechm(SE, features = rownames(SE), hmcols=ExpCols, breaks=seq(head(break_coord,1), tail(break_coord,1), by=0.2), show_rownames=FALSE, show_colnames=TRUE, do.scale=FALSE, column_title = "Average methylation levels (by gene) of CpGs in escapee regions", top_annotation = c("Type"), row_title_rot = 0, cluster_rows = TRUE)
Let’s change the range of methylation
<- assay(SE) %>% quantile(seq(0,1, by=0.1), na.rm =TRUE)
break_coord <- viridisLite::plasma(length(seq(break_coord[2], tail(break_coord,1), by=0.05)))
ExpCols sechm(SE, features = rownames(SE), hmcols=ExpCols, breaks=seq(break_coord[2], tail(break_coord,1), by=0.05), show_rownames=FALSE, show_colnames=TRUE, do.scale=FALSE, column_title = "Average methylation levels (by gene) of CpGs in escapee regions", top_annotation = c("Type"), row_title_rot = 0, cluster_rows = TRUE)
Let’s now show the median methylation levels. Instead of plotting for each row an escapee CpGs, I aggregate CpGs by ensembl gene id doing the median of the methylation levels.
<- aggregate(assays(bsseq_obj_subset)$Meth, list(rowData(bsseq_obj_subset)$ensembl_gene_id), median, na.rm = T)
MedianMeth rownames(MedianMeth) <- MedianMeth$Group.1
<- MedianMeth[, -1]
MedianMeth
<- rowData(bsseq_obj_subset)
rowDataSE rownames(rowDataSE) <- rowData(bsseq_obj_subset)$ensembl_gene_id
<- rowDataSE[rownames(MedianMeth), ]
rowDataSE
<- colData(bsseq_obj_subset)
SampleAnno
<- SummarizedExperiment(assays=MedianMeth, colData=SampleAnno, rowData = rowDataSE)
SE
identical(colnames(assay(SE)), rownames(colData(SE))) #TRUE
## [1] TRUE
<- c(hiPSCs ="#dd1c77", iMeLCs ="#377eb8", hPGCLCs ="#4daf4a", hEGCLCs = "#ff7f00")
cellTypes_colors colData(SE)$Type <- factor(colData(SE)$Type, levels = c("hiPSCs", "iMeLCs" , "hPGCLCs", "hEGCLCs"))
metadata(SE)$anno_colors <- list(Type = cellTypes_colors,
GenomicRegion = c ("3' UTR" = "#FFADAD", "5' UTR" = "#FFD6A5", "Distal Intergenic" = "#FDFFB6", "Downstream (<=300bp)" = "#FFFFFC", "Exon" = "#9BF6FF", "Intron" = "#A0C4FF", "Promoter (<=1kb)" = "#BDB2FF", "Promoter (1-2kb)" = "#FFC6FF", "Promoter (2-3kb)" = "#CAFFBF"))
<- assay(SE) %>% quantile(seq(0,1, by=0.1), na.rm =TRUE)
break_coord <- viridisLite::plasma(length(seq(head(break_coord,1), tail(break_coord,1), by=0.2)))
ExpCols sechm(SE, features = rownames(SE), hmcols=ExpCols, breaks=seq(head(break_coord,1), tail(break_coord,1), by=0.2), show_rownames=FALSE, show_colnames=TRUE, do.scale=FALSE, column_title = "Median methylation levels (by gene) of CpGs in escapee regions", top_annotation = c("Type"), row_title_rot = 0, cluster_rows = TRUE)
Let’s change the range of methylation (1st-10th deciles of methylation)
<- assay(SE) %>% quantile(seq(0,1, by=0.1), na.rm =TRUE)
break_coord <- viridisLite::plasma(length(seq(break_coord[2], tail(break_coord,1), by=0.05)))
ExpCols sechm(SE, features = rownames(SE), hmcols=ExpCols, breaks=seq(break_coord[2], tail(break_coord,1), by=0.05), show_rownames=FALSE, show_colnames=TRUE, do.scale=FALSE, column_title = "Median methylation levels (by gene) of CpGs in escapee regions", top_annotation = c("Type"), row_title_rot = 0, cluster_rows = TRUE)
date()
## [1] "Tue Jan 28 16:36:07 2025"
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] tidyr_1.3.0 sechm_1.6.0
## [3] bsseq_1.34.0 SummarizedExperiment_1.28.0
## [5] Biobase_2.58.0 MatrixGenerics_1.10.0
## [7] matrixStats_1.0.0 GenomicRanges_1.50.2
## [9] GenomeInfoDb_1.34.9 IRanges_2.32.0
## [11] S4Vectors_0.36.2 BiocGenerics_0.44.0
## [13] gridExtra_2.3 scales_1.2.1
## [15] ggplot2_3.4.2 dplyr_1.1.2
##
## loaded via a namespace (and not attached):
## [1] DSS_2.46.0 bitops_1.0-7
## [3] doParallel_1.0.17 RColorBrewer_1.1-3
## [5] tools_4.2.1 bslib_0.5.0
## [7] utf8_1.2.3 R6_2.5.1
## [9] HDF5Array_1.26.0 colorspace_2.1-0
## [11] permute_0.9-7 rhdf5filters_1.10.1
## [13] GetoptLong_1.0.5 withr_2.5.0
## [15] tidyselect_1.2.0 curl_5.0.1
## [17] compiler_4.2.1 cli_3.6.1
## [19] Cairo_1.6-0 methylSig_1.10.0
## [21] TSP_1.2-4 DelayedArray_0.24.0
## [23] rtracklayer_1.58.0 sass_0.4.7
## [25] stringr_1.5.0 digest_0.6.33
## [27] Rsamtools_2.14.0 rmarkdown_2.23
## [29] R.utils_2.12.2 ca_0.71.1
## [31] XVector_0.38.0 pkgconfig_2.0.3
## [33] htmltools_0.5.5 sparseMatrixStats_1.10.0
## [35] highr_0.10 fastmap_1.1.1
## [37] limma_3.54.2 BSgenome_1.66.3
## [39] rlang_1.1.1 GlobalOptions_0.1.2
## [41] rstudioapi_0.15.0 DelayedMatrixStats_1.20.0
## [43] shape_1.4.6 jquerylib_0.1.4
## [45] BiocIO_1.8.0 generics_0.1.3
## [47] jsonlite_1.8.7 BiocParallel_1.32.6
## [49] gtools_3.9.4 R.oo_1.25.0
## [51] RCurl_1.98-1.12 magrittr_2.0.3
## [53] GenomeInfoDbData_1.2.9 Matrix_1.6-0
## [55] Rcpp_1.0.11 munsell_0.5.0
## [57] Rhdf5lib_1.20.0 fansi_1.0.4
## [59] lifecycle_1.0.3 R.methodsS3_1.8.2
## [61] stringi_1.7.12 yaml_2.3.7
## [63] zlibbioc_1.44.0 rhdf5_2.42.1
## [65] Rtsne_0.16 grid_4.2.1
## [67] parallel_4.2.1 randomcoloR_1.1.0.1
## [69] crayon_1.5.2 lattice_0.21-8
## [71] splines_4.2.1 Biostrings_2.66.0
## [73] circlize_0.4.15 locfit_1.5-9.7
## [75] knitr_1.43 ComplexHeatmap_2.14.0
## [77] pillar_1.9.0 rjson_0.2.21
## [79] codetools_0.2-19 XML_3.99-0.14
## [81] glue_1.6.2 evaluate_0.21
## [83] V8_4.3.0 data.table_1.14.8
## [85] vctrs_0.6.3 png_0.1-8
## [87] foreach_1.5.2 purrr_1.0.1
## [89] gtable_0.3.3 clue_0.3-64
## [91] cachem_1.0.8 xfun_0.39
## [93] restfulr_0.0.15 viridisLite_0.4.2
## [95] seriation_1.4.2 tibble_3.2.1
## [97] iterators_1.0.14 registry_0.5-1
## [99] GenomicAlignments_1.34.1 cluster_2.1.4