library(plyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(bsseq)
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:plyr':
## 
##     rename
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:plyr':
## 
##     desc
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
## 
##     count
## The following object is masked from 'package:plyr':
## 
##     count
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(dmrseq)
library(ggplot2)
source("../Methylation_helper.R")
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:S4Vectors':
## 
##     expand

Set random seed and number of permutations

set.seed(123)
num_permutations <- params$num_permutations

1. Data loading and fast exploration by PCA

Loading bsseq objects

bsseq_obj <- readRDS("~/DataDir/1.PreliminaryAnalysis/Output/WithSelectedEGCLCs/bsseq_obj_sharedby75ofall.rds") #getting bsseq object where sample selection and CpG filtering were already performed 
bsseq_obj_escapees <- readRDS(paste0(params$InputFolder, "bsseq_obj_escapees.rds"))
bsseq_obj_NonEscapees <- readRDS(paste0(params$InputFolder, "bsseq_obj_NonEscapees.rds"))
  • Our bsseq object contain 3680616 CpG loci.
  • Among them 9113 CpG loci belong to escapees regions (escapee CpGs).
  • While 3671503 CpG loci do not belong to escapees regions (non-escapee CpGs).

Colors for figures

cellTypes_colors <- c(hiPSCs ="#dd1c77", iMeLCs ="#377eb8", hPGCLCs ="#4daf4a", hEGCLCs = "#ff7f00")

PCA only on escapee CpGs

pca_res <- do_PCA(getMeth(bsseq_obj_escapees, type = "raw"))
sample_anno <- bsseq::pData(bsseq_obj_escapees)
plot_PCA(pca_res = pca_res, anno = sample_anno, col_anno = "Type", shape_anno = NULL, custom_colors = cellTypes_colors, point_size = 5)

PCA only on non-escapee CpGs

pca_res <- do_PCA(getMeth(bsseq_obj_NonEscapees, type = "raw"))
sample_anno <- bsseq::pData(bsseq_obj_NonEscapees)
plot_PCA(pca_res = pca_res, anno = sample_anno, col_anno = "Type", shape_anno = NULL, custom_colors = cellTypes_colors, point_size = 5)

As expected it resembles the PCA done on the complete one, since we removed only 9113 out of a total of 3680616 CpG loci.

2.Generation of 500 random sets of 9113 non-escapee CpGs and distributions plots

Let’s generate random indices to randomly select 9113 CpG loci among the non-escapee ones.

random_sets <- list()

for (i in 1:num_permutations) {
  random_indices <- sample(1:dim(bsseq_obj_NonEscapees)[1], dim(bsseq_obj_escapees)[1], replace = FALSE)
  bsseq_obj_randomNonEscapees <- bsseq_obj_NonEscapees[random_indices, ]
  
  random_sets[[i]] <- bsseq_obj_randomNonEscapees
}

Head of BSSEQ with escapee CpGs

getMeth(bsseq_obj_escapees, type = "raw") %>% head()
##      hiPSC_rep1 hiPSC_rep2 hiPSC_rep3 hiPSC_rep4 iMeLC_rep1 iMeLC_rep2
## [1,]  0.9545455  0.9473684  0.9482759  0.9821429  0.9767442  0.9813084
## [2,]  1.0000000  0.9803922  0.9615385  0.9800000  0.9714286  0.9622642
## [3,]  0.8684211  0.9298246  0.7313433  0.8985507  0.9090909  0.7758621
## [4,]  0.8888889  0.9166667  0.8666667  0.9090909  0.9600000  0.8198198
## [5,]  0.7500000  0.8431373  0.8163265  0.7083333  0.8372093  0.8461538
## [6,]  0.8000000  0.8095238  0.8260870  0.8260870  0.8750000  0.6551724
##      iMeLC_rep3 iMeLC_rep4 hPGCLC_rep1 hPGCLC_rep2 hPGCLC_rep3 hEGCLC_rep1
## [1,]  0.8909091  0.9534884   0.9062500   1.0000000   0.9428571   0.9230769
## [2,]  0.9230769  0.9032258   0.9600000   0.9545455   0.9677419   0.9850746
## [3,]  0.7608696  0.8666667   0.8108108   0.6969697   0.7948718   0.8426966
## [4,]  0.8297872  0.8666667   0.8437500   0.7741935   0.8837209   0.8571429
## [5,]  0.7894737  0.7333333   0.7307692   0.7272727   0.8378378   0.7333333
## [6,]  0.9000000  0.8500000   0.8750000   0.8947368   0.6944444   0.7812500
##      hEGCLC_rep2 hEGCLC_rep3
## [1,]   0.9081633   0.9318182
## [2,]   0.9484536   0.9594595
## [3,]   0.8750000   0.8400000
## [4,]   0.8450704   0.8311688
## [5,]   0.6949153   0.8730159
## [6,]   0.7692308   0.8888889

Escapee CpG methylation distribution plot

plotEmpiricalDistribution(bsseq_obj_escapees, 
                          bySample = TRUE,
                          testCovariate = "Type",
                          adj = 3) + guides(linetype="none") + 
  labs(title = "Escapee CpG methylation distribution by cell type") + ggplot2::scale_color_manual(values = cellTypes_colors)

plotEmpiricalDistribution(bsseq_obj_escapees, 
                          bySample = FALSE,
                          testCovariate = "Line",
                          adj = 3) + guides(linetype="none") + 
  labs(title = "Escapee CpG methylation distribution collapsing cell types")

Non-Escapee CpG methylation distribution plot

plotEmpiricalDistribution(bsseq_obj_NonEscapees, 
                          bySample = TRUE,
                          testCovariate = "Type",
                          adj = 3) + guides(linetype="none") + 
  labs(title = "Non-escapee CpG methylation distribution by cell type") + ggplot2::scale_color_manual(values = cellTypes_colors)

plotEmpiricalDistribution(bsseq_obj_NonEscapees, 
                          bySample = FALSE,
                          testCovariate = "Line",
                          adj = 3) + guides(linetype="none") + 
  labs(title = "Non-escapee CpG methylation distribution collapsing cell types")

Head of first BSSEQ with randomly selected non-escapee CpGs

getMeth(random_sets[[1]], type = "raw") %>% head()
##      hiPSC_rep1 hiPSC_rep2 hiPSC_rep3 hiPSC_rep4 iMeLC_rep1 iMeLC_rep2
## [1,]  1.0000000 0.95454545 0.93548387 0.90909091  0.9130435 0.87804878
## [2,]  0.8823529 0.94444444 0.90476190 0.87500000  0.9000000 0.84848485
## [3,]  0.3055556 0.13333333 0.21428571 0.21212121  0.2558140 0.25316456
## [4,]  0.1794872 0.08196721 0.08163265 0.06382979  0.1052632 0.08860759
## [5,]  0.7586207 0.96551724 1.00000000 0.94117647  0.8846154 0.88679245
## [6,]  0.9843750 0.96666667 0.93877551 0.96212121  0.9753086 0.97972973
##      iMeLC_rep3 iMeLC_rep4 hPGCLC_rep1 hPGCLC_rep2 hPGCLC_rep3 hEGCLC_rep1
## [1,]  0.9583333  1.0000000  0.88888889   0.9642857   0.9487179   0.9743590
## [2,]  0.8181818  0.7419355  0.86363636   0.9230769   0.9200000   0.8205128
## [3,]  0.1578947  0.1785714  0.20512821   0.1600000   0.1777778   0.2187500
## [4,]  0.1707317  0.1000000  0.02631579   0.1470588   0.1428571   0.1750000
## [5,]  0.8372093  0.7837838  0.83333333   0.7000000   0.6976744   0.9117647
## [6,]  1.0000000  0.9393939  0.96363636   0.9677419   0.9545455   0.9375000
##      hEGCLC_rep2 hEGCLC_rep3
## [1,]   0.8965517  0.94444444
## [2,]   0.7000000  0.82926829
## [3,]   0.1971831  0.20652174
## [4,]   0.1923077  0.07042254
## [5,]   0.9411765  0.84210526
## [6,]   1.0000000  0.95833333

First set of randomly selected non-escapee CpG methylation distribution plot

plotEmpiricalDistribution(random_sets[[1]], 
                          bySample = TRUE,
                          testCovariate = "Type",
                          adj = 3) + guides(linetype="none") + 
  labs(title = "Non-escapee CpG methylation distribution by cell type (1)") + ggplot2::scale_color_manual(values = cellTypes_colors)

Head of second BSSEQ with randomly selected non-escapee CpGs

getMeth(random_sets[[2]], type = "raw") %>% head()
##      hiPSC_rep1 hiPSC_rep2 hiPSC_rep3 hiPSC_rep4 iMeLC_rep1 iMeLC_rep2
## [1,]  1.0000000 0.94117647  1.0000000  0.9333333  0.9375000 1.00000000
## [2,]  0.9423077 0.89830508  0.8695652  0.8809524  0.8571429 0.86407767
## [3,]  0.0000000 0.02702703  0.0000000  0.0000000  0.0000000 0.04109589
## [4,]  0.8979592 0.93650794  0.8730159  0.9868421  0.9622642 0.93965517
## [5,]  0.6875000 0.93333333  1.0000000  0.9333333  0.8333333 0.80000000
## [6,]  0.9259259 0.97916667  0.9545455  0.9843750  0.9250000 0.91666667
##      iMeLC_rep3 iMeLC_rep4 hPGCLC_rep1 hPGCLC_rep2 hPGCLC_rep3 hEGCLC_rep1
## [1,]  0.8571429  0.9200000  0.82352941   1.0000000   0.8461538  0.97619048
## [2,]  0.8769231  0.9014085  0.88000000   0.8648649   0.9245283  0.87254902
## [3,]  0.0000000  0.0000000  0.04081633   0.0000000   0.0200000  0.01190476
## [4,]  0.9843750  0.9433962  0.90476190   0.9714286   0.9655172  0.88461538
## [5,]  0.9000000        NaN         NaN   0.5294118   0.2000000  0.78947368
## [6,]  0.9836066  0.8909091  0.91176471   0.8181818   0.9038462  0.81707317
##      hEGCLC_rep2 hEGCLC_rep3
## [1,]  0.79411765   0.8461538
## [2,]  0.82142857   0.8791209
## [3,]  0.01666667   0.0000000
## [4,]  0.89380531   0.9099099
## [5,]  0.84210526         NaN
## [6,]  0.92452830   0.9154930

Second set of randomly selected non-escapee CpG methylation distribution plot

plotEmpiricalDistribution(random_sets[[2]], 
                          bySample = TRUE,
                          testCovariate = "Type",
                          adj = 3) + guides(linetype="none") + 
  labs(title = "Non-escapee CpG methylation distribution by cell type (2)") + ggplot2::scale_color_manual(values = cellTypes_colors)

3. Kolmogorov-Smirnov test

Example of KS test result comparing distributions in case of hEGCLC_rep1 sample: escapee CpG methylation levels vs first randomly selected non-escapee set.

ks_test_result <- ks.test(getMeth(bsseq_obj_escapees, type = "raw")[, "hEGCLC_rep1"], getMeth(random_sets[[1]], type = "raw")[, "hEGCLC_rep1"])
print(ks_test_result)
## 
##  Asymptotic two-sample Kolmogorov-Smirnov test
## 
## data:  getMeth(bsseq_obj_escapees, type = "raw")[, "hEGCLC_rep1"] and getMeth(random_sets[[1]], type = "raw")[, "hEGCLC_rep1"]
## D = 0.49116, p-value < 2.2e-16
## alternative hypothesis: two-sided

Example of KS test result comparing distributions in case of hEGCLC_rep1 sample: first randomly selected non-escapee set vs second randomly selected non-escapee set.

ks_test_result <- ks.test(getMeth(random_sets[[1]], type = "raw")[, "hEGCLC_rep1"], getMeth(random_sets[[2]], type = "raw")[, "hEGCLC_rep1"])
print(ks_test_result)
## 
##  Asymptotic two-sample Kolmogorov-Smirnov test
## 
## data:  getMeth(random_sets[[1]], type = "raw")[, "hEGCLC_rep1"] and getMeth(random_sets[[2]], type = "raw")[, "hEGCLC_rep1"]
## D = 0.016899, p-value = 0.1481
## alternative hypothesis: two-sided

Example of KS test result comparing distributions: escapee CpG methylation levels (hiPSC_rep2 sample) vs first randomly selected non-escapee set (hPGCLC_rep2 sample).

ks_test_result <- ks.test(getMeth(random_sets[[1]], type = "raw")[, "hiPSC_rep2"], getMeth(random_sets[[1]], type = "raw")[, "hPGCLC_rep2"])
print(ks_test_result)
## 
##  Asymptotic two-sample Kolmogorov-Smirnov test
## 
## data:  getMeth(random_sets[[1]], type = "raw")[, "hiPSC_rep2"] and getMeth(random_sets[[1]], type = "raw")[, "hPGCLC_rep2"]
## D = 0.12607, p-value < 2.2e-16
## alternative hypothesis: two-sided

Example of KS test result comparing distributions: escapee CpG methylation levels vs first randomly selected non-escapee set considering the mean levels across cell types (rowwise).

ks_test_result <- ks.test(getMeth(bsseq_obj_escapees, type = "raw") %>% rowMeans(na.rm = TRUE), getMeth(random_sets[[1]], type = "raw") %>% rowMeans(na.rm = TRUE))
print(ks_test_result)
## 
##  Asymptotic two-sample Kolmogorov-Smirnov test
## 
## data:  getMeth(bsseq_obj_escapees, type = "raw") %>% rowMeans(na.rm = TRUE) and getMeth(random_sets[[1]], type = "raw") %>% rowMeans(na.rm = TRUE)
## D = 0.50247, p-value < 2.2e-16
## alternative hypothesis: two-sided

Example of KS test result comparing distributions: first randomly selected non-escapee set vs second randomly selected non-escapee set considering the mean levels across cell types (rowwise).

ks_test_result <- ks.test(getMeth(random_sets[[1]], type = "raw") %>% rowMeans(na.rm = TRUE), getMeth(random_sets[[2]], type = "raw") %>% rowMeans(na.rm = TRUE))
print(ks_test_result)
## 
##  Asymptotic two-sample Kolmogorov-Smirnov test
## 
## data:  getMeth(random_sets[[1]], type = "raw") %>% rowMeans(na.rm = TRUE) and getMeth(random_sets[[2]], type = "raw") %>% rowMeans(na.rm = TRUE)
## D = 0.020849, p-value = 0.03807
## alternative hypothesis: two-sided

Let’s calculate the mean methylation levels across cell types in all sets of randomly selected non-escapee CpGs.

random_MeanMethLevels <- lapply(random_sets, function(bsseq){
  getMeth(bsseq, type = "raw") %>% rowMeans(na.rm = TRUE)})

Let’s do KS test comparing escapee CpG methylation levels vs all 500 randomly selected non-escapee sets.

ks_test_results <- lapply(random_MeanMethLevels, function(random_MeanMeth) {
  ks.test(getMeth(bsseq_obj_escapees, type = "raw") %>% rowMeans(na.rm = TRUE), random_MeanMeth)
})
pvalues_kstest <- sapply(ks_test_results, function(result) {
  result$p.value
})
adjusted_pvalues_kstest <- p.adjust(pvalues_kstest, method = "fdr")
print(adjusted_pvalues_kstest)
##   [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [38] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [75] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [149] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [186] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [223] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [260] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [297] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [334] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [371] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [408] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [445] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [482] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

How many adjusted p-values are < 0.05?

table(adjusted_pvalues_kstest < 0.05)
## 
## TRUE 
##  500

Let’s do KS test comparing first randomly selected non-escapee set vs all 500 randomly selected non-escapee sets.

ks_test_results <- lapply(random_MeanMethLevels, function(random_MeanMeth) {
  ks.test(random_MeanMethLevels[[1]], random_MeanMeth)
})
pvalues_kstest <- sapply(ks_test_results, function(result) {
  result$p.value
})
adjusted_pvalues_kstest <- p.adjust(pvalues_kstest, method = "fdr")
print(adjusted_pvalues_kstest)
##   [1] 1.0000000 0.3518471 0.3847888 0.4303199 0.5205655 0.4407191 0.4442382
##   [8] 0.4407191 0.7503240 0.5407596 0.4407191 0.6083537 0.4375700 0.3422134
##  [15] 0.6768386 0.3422134 0.3578715 0.4755173 0.5488259 0.4438323 0.6736389
##  [22] 0.3847888 0.3353127 0.4585010 0.4159077 0.4159077 0.4303199 0.3422134
##  [29] 0.5148881 0.6915562 0.4271400 0.6867604 0.3578715 0.6083537 0.4442382
##  [36] 0.4823529 0.4407191 0.5381415 0.7503240 0.3432536 0.7134610 0.8199062
##  [43] 0.6915562 0.5999931 0.3518471 0.8720074 0.5148881 0.4407191 0.6318439
##  [50] 0.4442382 0.4303199 0.3353127 0.3818911 0.4303199 0.5286105 0.6385701
##  [57] 0.8491772 0.3422134 0.5074578 0.7816998 0.4823529 0.5286105 0.3706061
##  [64] 0.5286105 0.3422134 0.3518471 0.4459734 0.5585965 0.4823529 0.4194335
##  [71] 0.4066980 0.6867604 0.3012774 0.8807709 0.8491772 0.7261750 0.4755173
##  [78] 0.9930061 0.5407596 0.5381415 0.5900274 0.9049578 0.4856585 0.3432536
##  [85] 0.3706966 0.5766692 0.7503240 0.3578715 0.8000823 0.4303199 0.7497694
##  [92] 0.4271400 0.4585010 0.3899973 0.3578715 0.3847888 0.6318439 0.8427266
##  [99] 0.3899973 0.6768386 0.3606856 0.4066980 0.3432536 0.4541645 0.5407596
## [106] 0.3012774 0.4442382 0.9049578 0.4194335 0.6318439 0.4194335 0.7089329
## [113] 0.6768386 0.6469239 0.4442382 0.4823529 0.8647174 0.4442382 0.7713814
## [120] 0.7883774 0.5849429 0.3422134 0.5074578 0.5900274 0.4438323 0.8427266
## [127] 0.8028482 0.5585965 0.3012774 0.8892022 0.3637692 0.4038396 0.3518471
## [134] 0.3847888 0.4585010 0.5286105 0.5381415 0.4856585 0.5667758 0.4459734
## [141] 0.5849429 0.4541645 0.5849682 0.3422134 0.3422134 0.3778936 0.3012774
## [148] 0.3637692 0.3847888 0.5407596 0.5766692 0.5900274 0.3578715 0.5148881
## [155] 0.3706966 0.4856585 0.4541645 0.3847888 0.4755173 0.4823529 0.4159077
## [162] 0.6768386 0.6284269 0.5001237 0.8359821 0.5310566 0.5205655 0.4823529
## [169] 0.7089329 0.8720074 0.3578715 0.4823529 0.3422134 0.9013164 0.6915562
## [176] 0.5849682 0.9013164 0.3518471 0.4856585 0.3012774 0.6083537 0.5286105
## [183] 0.3518471 0.4438323 0.5849682 0.3578715 0.4303199 0.5849682 0.3847888
## [190] 0.3847888 0.3353127 0.5407596 0.4442382 0.8028482 0.6979796 0.4159077
## [197] 0.3578715 0.8588919 0.5849682 0.4459734 0.8359821 0.7089329 0.3422134
## [204] 0.3578715 0.7503240 0.6469239 0.3518471 0.6979796 0.5310566 0.3422134
## [211] 0.6915562 0.7503240 0.7883774 0.4303199 0.5849429 0.7060067 0.5148881
## [218] 0.4442382 0.6768386 0.5407596 0.5900274 0.4303199 0.4499864 0.4271400
## [225] 0.7060067 0.6469239 0.8028482 0.4303199 0.5381415 0.4585010 0.5407596
## [232] 0.6469239 0.3353127 0.3518471 0.5381415 0.5407596 0.9013164 0.5849682
## [239] 0.7816998 0.8359821 0.7134610 0.3578715 0.5310566 0.7503240 0.5205655
## [246] 0.3578715 0.5001237 0.6385701 0.4012447 0.6385701 0.4442382 0.3818911
## [253] 0.3518471 0.4823529 0.8491772 0.4823529 0.4159077 0.3012774 0.4438323
## [260] 0.3518471 0.3518471 0.4823529 0.5849682 0.9930061 0.5407596 0.6979796
## [267] 0.3518471 0.3518471 0.5381415 0.3518471 0.4823529 0.3518471 0.8427266
## [274] 0.4823529 0.4928061 0.3422134 0.3432536 0.8199062 0.3847888 0.6318439
## [281] 0.7713814 0.5407596 0.3518471 0.3606856 0.5488259 0.4375700 0.4159077
## [288] 0.5381415 0.7713814 0.3518471 0.6318439 0.8199062 0.5286105 0.4823529
## [295] 0.3847888 0.5667758 0.5286105 0.4303199 0.3422134 0.4038396 0.5205655
## [302] 0.4499864 0.4823529 0.8647174 0.8028482 0.7388558 0.6915562 0.5407596
## [309] 0.3706966 0.5310566 0.3422134 0.4303199 0.5667758 0.5667758 0.8359821
## [316] 0.9013164 0.6083537 0.4459734 0.3578715 0.5407596 0.4442382 0.3847888
## [323] 0.3422134 0.3422134 0.4271400 0.3353127 0.8647174 0.9506226 0.5900274
## [330] 0.4066980 0.6736389 0.3422134 0.5381415 0.5407596 0.4442382 0.3028498
## [337] 0.4928061 0.4271400 0.6167284 0.4407191 0.6318439 0.3518471 0.5849682
## [344] 0.5381415 0.4442382 0.5999931 0.7089329 0.4669470 0.3518471 0.4856585
## [351] 0.8028482 0.5286105 0.7503240 0.4499864 0.3637692 0.3422134 0.5074578
## [358] 0.3578715 0.3012774 0.5286105 0.7089329 0.4459734 0.4823529 0.4303199
## [365] 0.3578715 0.3578715 0.3422134 0.3847888 0.4438323 0.7713814 0.5849429
## [372] 0.6167284 0.4669470 0.7883774 0.5488259 0.4499864 0.3518471 0.7134610
## [379] 0.4928061 0.3578715 0.9506226 0.3012774 0.8123670 0.5001237 0.3222582
## [386] 0.5310566 0.3847888 0.4407191 0.4459734 0.4823529 0.5074578 0.4499864
## [393] 0.3518471 0.6979796 0.4271400 0.5381415 0.3606856 0.6768386 0.7134610
## [400] 0.3422134 0.7883774 0.4066980 0.5849682 0.4407191 0.4271400 0.3578715
## [407] 0.4541645 0.3422134 0.6385701 0.7060067 0.6167284 0.3422134 0.4442382
## [414] 0.7134610 0.7497694 0.4407191 0.3578715 0.7089329 0.4585010 0.3422134
## [421] 0.6915562 0.3778936 0.3706966 0.3847888 0.3012774 0.3422134 0.4038396
## [428] 0.5286105 0.5310566 0.4928061 0.6979796 0.4303199 0.3422134 0.4823529
## [435] 0.6318439 0.4303199 0.4375700 0.4303199 0.5286105 0.3578715 0.7134610
## [442] 0.4541645 0.5381415 0.3899973 0.3518471 0.5407596 0.3578715 0.6867604
## [449] 0.6385701 0.8028482 0.6284269 0.4823529 0.6167284 0.5205655 0.4038396
## [456] 0.5407596 0.4271400 0.3422134 0.3012774 0.5900274 0.5381415 0.8359821
## [463] 0.5488259 0.9359103 0.5999931 0.5766692 0.3847888 0.3422134 0.4669470
## [470] 0.5381415 0.9480527 0.4856585 0.3847888 0.7060067 0.4823529 0.6768386
## [477] 0.3518471 0.3578715 0.3518471 0.4159077 0.4303199 0.4407191 0.3818911
## [484] 0.3422134 0.3422134 0.4194335 0.3847888 0.5407596 0.5001237 0.5310566
## [491] 0.8123670 0.5585965 0.3578715 0.7089329 0.9049578 0.3422134 0.4407191
## [498] 0.3706061 0.5286105 0.7503240

How many adjusted p-values are < 0.05?

table(adjusted_pvalues_kstest < 0.05)
## 
## FALSE 
##   500

4.Escapee and non escapee CpG methylation distribution plot

MeanMethLevels_df <- c(unlist(random_MeanMethLevels), getMeth(bsseq_obj_escapees, type = "raw") %>% rowMeans(na.rm = TRUE) %>% as.vector()) %>% as.data.frame()
colnames(MeanMethLevels_df) <- "MeanMeth"
MeanMethLevels_df$Distribution <- c(paste0("Random_", rep(1:num_permutations, each = dim(bsseq_obj_escapees)[1])), rep("Escapees_CpGs", each = dim(bsseq_obj_escapees)[1]))
MeanMethLevels_df$Category <- "Non Escapees"
MeanMethLevels_df$Category[(nrow(MeanMethLevels_df)+1-nrow(bsseq_obj_escapees)):nrow(MeanMethLevels_df)] <- "Escapees"
ggplot(MeanMethLevels_df, aes(x = MeanMeth, color = Category, group = Distribution)) +
  geom_density(alpha = 0.5)  +
  labs(
    title = "Distribution CpG mean methylation levels in escapees and non-escapees regions", 
    x = "Mean Methylation Levels across cell types", 
    y = "Density", 
    color = "Category"
  ) +
  scale_color_manual(values = c("Escapees" = "#FF7D00", "Non Escapees" = "#15616D")) + 
  theme_minimal()

5.Session Info

date()
## [1] "Tue Jan 28 16:52:36 2025"
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] tidyr_1.3.0                 ggplot2_3.4.2              
##  [3] dmrseq_1.18.1               bsseq_1.34.0               
##  [5] SummarizedExperiment_1.28.0 Biobase_2.58.0             
##  [7] MatrixGenerics_1.10.0       matrixStats_1.0.0          
##  [9] GenomicRanges_1.50.2        GenomeInfoDb_1.34.9        
## [11] IRanges_2.32.0              S4Vectors_0.36.2           
## [13] BiocGenerics_0.44.0         dplyr_1.1.2                
## [15] plyr_1.8.8                 
## 
## loaded via a namespace (and not attached):
##   [1] AnnotationHub_3.6.0           BiocFileCache_2.6.1          
##   [3] splines_4.2.1                 BiocParallel_1.32.6          
##   [5] digest_0.6.33                 foreach_1.5.2                
##   [7] htmltools_0.5.5               fansi_1.0.4                  
##   [9] magrittr_2.0.3                memoise_2.0.1                
##  [11] BSgenome_1.66.3               tzdb_0.4.0                   
##  [13] limma_3.54.2                  Biostrings_2.66.0            
##  [15] readr_2.1.4                   R.utils_2.12.2               
##  [17] prettyunits_1.1.1             colorspace_2.1-0             
##  [19] blob_1.2.4                    rappdirs_0.3.3               
##  [21] xfun_0.39                     crayon_1.5.2                 
##  [23] RCurl_1.98-1.12               jsonlite_1.8.7               
##  [25] annotatr_1.24.0               iterators_1.0.14             
##  [27] glue_1.6.2                    gtable_0.3.3                 
##  [29] zlibbioc_1.44.0               XVector_0.38.0               
##  [31] DelayedArray_0.24.0           Rhdf5lib_1.20.0              
##  [33] HDF5Array_1.26.0              scales_1.2.1                 
##  [35] DBI_1.1.3                     rngtools_1.5.2               
##  [37] Rcpp_1.0.11                   xtable_1.8-4                 
##  [39] progress_1.2.2                bumphunter_1.40.0            
##  [41] bit_4.0.5                     httr_1.4.6                   
##  [43] RColorBrewer_1.1-3            ellipsis_0.3.2               
##  [45] pkgconfig_2.0.3               XML_3.99-0.14                
##  [47] R.methodsS3_1.8.2             farver_2.1.1                 
##  [49] sass_0.4.7                    dbplyr_2.3.3                 
##  [51] locfit_1.5-9.7                utf8_1.2.3                   
##  [53] tidyselect_1.2.0              labeling_0.4.2               
##  [55] rlang_1.1.1                   reshape2_1.4.4               
##  [57] later_1.3.1                   AnnotationDbi_1.60.2         
##  [59] munsell_0.5.0                 BiocVersion_3.16.0           
##  [61] tools_4.2.1                   cachem_1.0.8                 
##  [63] cli_3.6.1                     generics_0.1.3               
##  [65] RSQLite_2.3.1                 evaluate_0.21                
##  [67] stringr_1.5.0                 fastmap_1.1.1                
##  [69] yaml_2.3.7                    outliers_0.15                
##  [71] knitr_1.43                    bit64_4.0.5                  
##  [73] purrr_1.0.1                   KEGGREST_1.38.0              
##  [75] nlme_3.1-162                  doRNG_1.8.6                  
##  [77] sparseMatrixStats_1.10.0      mime_0.12                    
##  [79] R.oo_1.25.0                   xml2_1.3.5                   
##  [81] biomaRt_2.54.1                compiler_4.2.1               
##  [83] rstudioapi_0.15.0             filelock_1.0.2               
##  [85] curl_5.0.1                    png_0.1-8                    
##  [87] interactiveDisplayBase_1.36.0 tibble_3.2.1                 
##  [89] bslib_0.5.0                   stringi_1.7.12               
##  [91] highr_0.10                    GenomicFeatures_1.50.4       
##  [93] lattice_0.21-8                Matrix_1.6-0                 
##  [95] permute_0.9-7                 vctrs_0.6.3                  
##  [97] pillar_1.9.0                  lifecycle_1.0.3              
##  [99] rhdf5filters_1.10.1           BiocManager_1.30.20          
## [101] jquerylib_0.1.4               data.table_1.14.8            
## [103] bitops_1.0-7                  httpuv_1.6.11                
## [105] rtracklayer_1.58.0            R6_2.5.1                     
## [107] BiocIO_1.8.0                  promises_1.2.0.1             
## [109] codetools_0.2-19              gtools_3.9.4                 
## [111] rhdf5_2.42.1                  rjson_0.2.21                 
## [113] withr_2.5.0                   regioneR_1.30.0              
## [115] GenomicAlignments_1.34.1      Rsamtools_2.14.0             
## [117] GenomeInfoDbData_1.2.9        parallel_4.2.1               
## [119] hms_1.1.3                     grid_4.2.1                   
## [121] rmarkdown_2.23                DelayedMatrixStats_1.20.0    
## [123] shiny_1.7.4.1                 restfulr_0.0.15