library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(bsseq)
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:plyr':
##
## rename
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:plyr':
##
## desc
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
##
## count
## The following object is masked from 'package:plyr':
##
## count
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(dmrseq)
library(ggplot2)
source("../Methylation_helper.R")
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:S4Vectors':
##
## expand
Set random seed and number of permutations
set.seed(123)
<- params$num_permutations num_permutations
Loading bsseq objects
<- readRDS("~/DataDir/1.PreliminaryAnalysis/Output/WithSelectedEGCLCs/bsseq_obj_sharedby75ofall.rds") #getting bsseq object where sample selection and CpG filtering were already performed
bsseq_obj <- readRDS(paste0(params$InputFolder, "bsseq_obj_escapees.rds"))
bsseq_obj_escapees <- readRDS(paste0(params$InputFolder, "bsseq_obj_NonEscapees.rds")) bsseq_obj_NonEscapees
Colors for figures
<- c(hiPSCs ="#dd1c77", iMeLCs ="#377eb8", hPGCLCs ="#4daf4a", hEGCLCs = "#ff7f00") cellTypes_colors
<- do_PCA(getMeth(bsseq_obj_escapees, type = "raw"))
pca_res <- bsseq::pData(bsseq_obj_escapees)
sample_anno plot_PCA(pca_res = pca_res, anno = sample_anno, col_anno = "Type", shape_anno = NULL, custom_colors = cellTypes_colors, point_size = 5)
<- do_PCA(getMeth(bsseq_obj_NonEscapees, type = "raw"))
pca_res <- bsseq::pData(bsseq_obj_NonEscapees)
sample_anno plot_PCA(pca_res = pca_res, anno = sample_anno, col_anno = "Type", shape_anno = NULL, custom_colors = cellTypes_colors, point_size = 5)
As expected it resembles the PCA done on the complete one, since we removed only 9113 out of a total of 3680616 CpG loci.
Let’s generate random indices to randomly select 9113 CpG loci among the non-escapee ones.
<- list()
random_sets
for (i in 1:num_permutations) {
<- sample(1:dim(bsseq_obj_NonEscapees)[1], dim(bsseq_obj_escapees)[1], replace = FALSE)
random_indices <- bsseq_obj_NonEscapees[random_indices, ]
bsseq_obj_randomNonEscapees
<- bsseq_obj_randomNonEscapees
random_sets[[i]] }
Head of BSSEQ with escapee CpGs
getMeth(bsseq_obj_escapees, type = "raw") %>% head()
## hiPSC_rep1 hiPSC_rep2 hiPSC_rep3 hiPSC_rep4 iMeLC_rep1 iMeLC_rep2
## [1,] 0.9545455 0.9473684 0.9482759 0.9821429 0.9767442 0.9813084
## [2,] 1.0000000 0.9803922 0.9615385 0.9800000 0.9714286 0.9622642
## [3,] 0.8684211 0.9298246 0.7313433 0.8985507 0.9090909 0.7758621
## [4,] 0.8888889 0.9166667 0.8666667 0.9090909 0.9600000 0.8198198
## [5,] 0.7500000 0.8431373 0.8163265 0.7083333 0.8372093 0.8461538
## [6,] 0.8000000 0.8095238 0.8260870 0.8260870 0.8750000 0.6551724
## iMeLC_rep3 iMeLC_rep4 hPGCLC_rep1 hPGCLC_rep2 hPGCLC_rep3 hEGCLC_rep1
## [1,] 0.8909091 0.9534884 0.9062500 1.0000000 0.9428571 0.9230769
## [2,] 0.9230769 0.9032258 0.9600000 0.9545455 0.9677419 0.9850746
## [3,] 0.7608696 0.8666667 0.8108108 0.6969697 0.7948718 0.8426966
## [4,] 0.8297872 0.8666667 0.8437500 0.7741935 0.8837209 0.8571429
## [5,] 0.7894737 0.7333333 0.7307692 0.7272727 0.8378378 0.7333333
## [6,] 0.9000000 0.8500000 0.8750000 0.8947368 0.6944444 0.7812500
## hEGCLC_rep2 hEGCLC_rep3
## [1,] 0.9081633 0.9318182
## [2,] 0.9484536 0.9594595
## [3,] 0.8750000 0.8400000
## [4,] 0.8450704 0.8311688
## [5,] 0.6949153 0.8730159
## [6,] 0.7692308 0.8888889
Escapee CpG methylation distribution plot
plotEmpiricalDistribution(bsseq_obj_escapees,
bySample = TRUE,
testCovariate = "Type",
adj = 3) + guides(linetype="none") +
labs(title = "Escapee CpG methylation distribution by cell type") + ggplot2::scale_color_manual(values = cellTypes_colors)
plotEmpiricalDistribution(bsseq_obj_escapees,
bySample = FALSE,
testCovariate = "Line",
adj = 3) + guides(linetype="none") +
labs(title = "Escapee CpG methylation distribution collapsing cell types")
Non-Escapee CpG methylation distribution plot
plotEmpiricalDistribution(bsseq_obj_NonEscapees,
bySample = TRUE,
testCovariate = "Type",
adj = 3) + guides(linetype="none") +
labs(title = "Non-escapee CpG methylation distribution by cell type") + ggplot2::scale_color_manual(values = cellTypes_colors)
plotEmpiricalDistribution(bsseq_obj_NonEscapees,
bySample = FALSE,
testCovariate = "Line",
adj = 3) + guides(linetype="none") +
labs(title = "Non-escapee CpG methylation distribution collapsing cell types")
Head of first BSSEQ with randomly selected non-escapee CpGs
getMeth(random_sets[[1]], type = "raw") %>% head()
## hiPSC_rep1 hiPSC_rep2 hiPSC_rep3 hiPSC_rep4 iMeLC_rep1 iMeLC_rep2
## [1,] 1.0000000 0.95454545 0.93548387 0.90909091 0.9130435 0.87804878
## [2,] 0.8823529 0.94444444 0.90476190 0.87500000 0.9000000 0.84848485
## [3,] 0.3055556 0.13333333 0.21428571 0.21212121 0.2558140 0.25316456
## [4,] 0.1794872 0.08196721 0.08163265 0.06382979 0.1052632 0.08860759
## [5,] 0.7586207 0.96551724 1.00000000 0.94117647 0.8846154 0.88679245
## [6,] 0.9843750 0.96666667 0.93877551 0.96212121 0.9753086 0.97972973
## iMeLC_rep3 iMeLC_rep4 hPGCLC_rep1 hPGCLC_rep2 hPGCLC_rep3 hEGCLC_rep1
## [1,] 0.9583333 1.0000000 0.88888889 0.9642857 0.9487179 0.9743590
## [2,] 0.8181818 0.7419355 0.86363636 0.9230769 0.9200000 0.8205128
## [3,] 0.1578947 0.1785714 0.20512821 0.1600000 0.1777778 0.2187500
## [4,] 0.1707317 0.1000000 0.02631579 0.1470588 0.1428571 0.1750000
## [5,] 0.8372093 0.7837838 0.83333333 0.7000000 0.6976744 0.9117647
## [6,] 1.0000000 0.9393939 0.96363636 0.9677419 0.9545455 0.9375000
## hEGCLC_rep2 hEGCLC_rep3
## [1,] 0.8965517 0.94444444
## [2,] 0.7000000 0.82926829
## [3,] 0.1971831 0.20652174
## [4,] 0.1923077 0.07042254
## [5,] 0.9411765 0.84210526
## [6,] 1.0000000 0.95833333
First set of randomly selected non-escapee CpG methylation distribution plot
plotEmpiricalDistribution(random_sets[[1]],
bySample = TRUE,
testCovariate = "Type",
adj = 3) + guides(linetype="none") +
labs(title = "Non-escapee CpG methylation distribution by cell type (1)") + ggplot2::scale_color_manual(values = cellTypes_colors)
Head of second BSSEQ with randomly selected non-escapee CpGs
getMeth(random_sets[[2]], type = "raw") %>% head()
## hiPSC_rep1 hiPSC_rep2 hiPSC_rep3 hiPSC_rep4 iMeLC_rep1 iMeLC_rep2
## [1,] 1.0000000 0.94117647 1.0000000 0.9333333 0.9375000 1.00000000
## [2,] 0.9423077 0.89830508 0.8695652 0.8809524 0.8571429 0.86407767
## [3,] 0.0000000 0.02702703 0.0000000 0.0000000 0.0000000 0.04109589
## [4,] 0.8979592 0.93650794 0.8730159 0.9868421 0.9622642 0.93965517
## [5,] 0.6875000 0.93333333 1.0000000 0.9333333 0.8333333 0.80000000
## [6,] 0.9259259 0.97916667 0.9545455 0.9843750 0.9250000 0.91666667
## iMeLC_rep3 iMeLC_rep4 hPGCLC_rep1 hPGCLC_rep2 hPGCLC_rep3 hEGCLC_rep1
## [1,] 0.8571429 0.9200000 0.82352941 1.0000000 0.8461538 0.97619048
## [2,] 0.8769231 0.9014085 0.88000000 0.8648649 0.9245283 0.87254902
## [3,] 0.0000000 0.0000000 0.04081633 0.0000000 0.0200000 0.01190476
## [4,] 0.9843750 0.9433962 0.90476190 0.9714286 0.9655172 0.88461538
## [5,] 0.9000000 NaN NaN 0.5294118 0.2000000 0.78947368
## [6,] 0.9836066 0.8909091 0.91176471 0.8181818 0.9038462 0.81707317
## hEGCLC_rep2 hEGCLC_rep3
## [1,] 0.79411765 0.8461538
## [2,] 0.82142857 0.8791209
## [3,] 0.01666667 0.0000000
## [4,] 0.89380531 0.9099099
## [5,] 0.84210526 NaN
## [6,] 0.92452830 0.9154930
Second set of randomly selected non-escapee CpG methylation distribution plot
plotEmpiricalDistribution(random_sets[[2]],
bySample = TRUE,
testCovariate = "Type",
adj = 3) + guides(linetype="none") +
labs(title = "Non-escapee CpG methylation distribution by cell type (2)") + ggplot2::scale_color_manual(values = cellTypes_colors)
Example of KS test result comparing distributions in case of hEGCLC_rep1 sample: escapee CpG methylation levels vs first randomly selected non-escapee set.
<- ks.test(getMeth(bsseq_obj_escapees, type = "raw")[, "hEGCLC_rep1"], getMeth(random_sets[[1]], type = "raw")[, "hEGCLC_rep1"])
ks_test_result print(ks_test_result)
##
## Asymptotic two-sample Kolmogorov-Smirnov test
##
## data: getMeth(bsseq_obj_escapees, type = "raw")[, "hEGCLC_rep1"] and getMeth(random_sets[[1]], type = "raw")[, "hEGCLC_rep1"]
## D = 0.49116, p-value < 2.2e-16
## alternative hypothesis: two-sided
Example of KS test result comparing distributions in case of hEGCLC_rep1 sample: first randomly selected non-escapee set vs second randomly selected non-escapee set.
<- ks.test(getMeth(random_sets[[1]], type = "raw")[, "hEGCLC_rep1"], getMeth(random_sets[[2]], type = "raw")[, "hEGCLC_rep1"])
ks_test_result print(ks_test_result)
##
## Asymptotic two-sample Kolmogorov-Smirnov test
##
## data: getMeth(random_sets[[1]], type = "raw")[, "hEGCLC_rep1"] and getMeth(random_sets[[2]], type = "raw")[, "hEGCLC_rep1"]
## D = 0.016899, p-value = 0.1481
## alternative hypothesis: two-sided
Example of KS test result comparing distributions: escapee CpG methylation levels (hiPSC_rep2 sample) vs first randomly selected non-escapee set (hPGCLC_rep2 sample).
<- ks.test(getMeth(random_sets[[1]], type = "raw")[, "hiPSC_rep2"], getMeth(random_sets[[1]], type = "raw")[, "hPGCLC_rep2"])
ks_test_result print(ks_test_result)
##
## Asymptotic two-sample Kolmogorov-Smirnov test
##
## data: getMeth(random_sets[[1]], type = "raw")[, "hiPSC_rep2"] and getMeth(random_sets[[1]], type = "raw")[, "hPGCLC_rep2"]
## D = 0.12607, p-value < 2.2e-16
## alternative hypothesis: two-sided
Example of KS test result comparing distributions: escapee CpG methylation levels vs first randomly selected non-escapee set considering the mean levels across cell types (rowwise).
<- ks.test(getMeth(bsseq_obj_escapees, type = "raw") %>% rowMeans(na.rm = TRUE), getMeth(random_sets[[1]], type = "raw") %>% rowMeans(na.rm = TRUE))
ks_test_result print(ks_test_result)
##
## Asymptotic two-sample Kolmogorov-Smirnov test
##
## data: getMeth(bsseq_obj_escapees, type = "raw") %>% rowMeans(na.rm = TRUE) and getMeth(random_sets[[1]], type = "raw") %>% rowMeans(na.rm = TRUE)
## D = 0.50247, p-value < 2.2e-16
## alternative hypothesis: two-sided
Example of KS test result comparing distributions: first randomly selected non-escapee set vs second randomly selected non-escapee set considering the mean levels across cell types (rowwise).
<- ks.test(getMeth(random_sets[[1]], type = "raw") %>% rowMeans(na.rm = TRUE), getMeth(random_sets[[2]], type = "raw") %>% rowMeans(na.rm = TRUE))
ks_test_result print(ks_test_result)
##
## Asymptotic two-sample Kolmogorov-Smirnov test
##
## data: getMeth(random_sets[[1]], type = "raw") %>% rowMeans(na.rm = TRUE) and getMeth(random_sets[[2]], type = "raw") %>% rowMeans(na.rm = TRUE)
## D = 0.020849, p-value = 0.03807
## alternative hypothesis: two-sided
Let’s calculate the mean methylation levels across cell types in all sets of randomly selected non-escapee CpGs.
<- lapply(random_sets, function(bsseq){
random_MeanMethLevels getMeth(bsseq, type = "raw") %>% rowMeans(na.rm = TRUE)})
Let’s do KS test comparing escapee CpG methylation levels vs all 500 randomly selected non-escapee sets.
<- lapply(random_MeanMethLevels, function(random_MeanMeth) {
ks_test_results ks.test(getMeth(bsseq_obj_escapees, type = "raw") %>% rowMeans(na.rm = TRUE), random_MeanMeth)
})<- sapply(ks_test_results, function(result) {
pvalues_kstest $p.value
result
})<- p.adjust(pvalues_kstest, method = "fdr")
adjusted_pvalues_kstest print(adjusted_pvalues_kstest)
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [38] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [75] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [149] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [186] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [223] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [260] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [297] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [334] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [371] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [408] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [445] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [482] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
How many adjusted p-values are < 0.05?
table(adjusted_pvalues_kstest < 0.05)
##
## TRUE
## 500
Let’s do KS test comparing first randomly selected non-escapee set vs all 500 randomly selected non-escapee sets.
<- lapply(random_MeanMethLevels, function(random_MeanMeth) {
ks_test_results ks.test(random_MeanMethLevels[[1]], random_MeanMeth)
})<- sapply(ks_test_results, function(result) {
pvalues_kstest $p.value
result
})<- p.adjust(pvalues_kstest, method = "fdr")
adjusted_pvalues_kstest print(adjusted_pvalues_kstest)
## [1] 1.0000000 0.3518471 0.3847888 0.4303199 0.5205655 0.4407191 0.4442382
## [8] 0.4407191 0.7503240 0.5407596 0.4407191 0.6083537 0.4375700 0.3422134
## [15] 0.6768386 0.3422134 0.3578715 0.4755173 0.5488259 0.4438323 0.6736389
## [22] 0.3847888 0.3353127 0.4585010 0.4159077 0.4159077 0.4303199 0.3422134
## [29] 0.5148881 0.6915562 0.4271400 0.6867604 0.3578715 0.6083537 0.4442382
## [36] 0.4823529 0.4407191 0.5381415 0.7503240 0.3432536 0.7134610 0.8199062
## [43] 0.6915562 0.5999931 0.3518471 0.8720074 0.5148881 0.4407191 0.6318439
## [50] 0.4442382 0.4303199 0.3353127 0.3818911 0.4303199 0.5286105 0.6385701
## [57] 0.8491772 0.3422134 0.5074578 0.7816998 0.4823529 0.5286105 0.3706061
## [64] 0.5286105 0.3422134 0.3518471 0.4459734 0.5585965 0.4823529 0.4194335
## [71] 0.4066980 0.6867604 0.3012774 0.8807709 0.8491772 0.7261750 0.4755173
## [78] 0.9930061 0.5407596 0.5381415 0.5900274 0.9049578 0.4856585 0.3432536
## [85] 0.3706966 0.5766692 0.7503240 0.3578715 0.8000823 0.4303199 0.7497694
## [92] 0.4271400 0.4585010 0.3899973 0.3578715 0.3847888 0.6318439 0.8427266
## [99] 0.3899973 0.6768386 0.3606856 0.4066980 0.3432536 0.4541645 0.5407596
## [106] 0.3012774 0.4442382 0.9049578 0.4194335 0.6318439 0.4194335 0.7089329
## [113] 0.6768386 0.6469239 0.4442382 0.4823529 0.8647174 0.4442382 0.7713814
## [120] 0.7883774 0.5849429 0.3422134 0.5074578 0.5900274 0.4438323 0.8427266
## [127] 0.8028482 0.5585965 0.3012774 0.8892022 0.3637692 0.4038396 0.3518471
## [134] 0.3847888 0.4585010 0.5286105 0.5381415 0.4856585 0.5667758 0.4459734
## [141] 0.5849429 0.4541645 0.5849682 0.3422134 0.3422134 0.3778936 0.3012774
## [148] 0.3637692 0.3847888 0.5407596 0.5766692 0.5900274 0.3578715 0.5148881
## [155] 0.3706966 0.4856585 0.4541645 0.3847888 0.4755173 0.4823529 0.4159077
## [162] 0.6768386 0.6284269 0.5001237 0.8359821 0.5310566 0.5205655 0.4823529
## [169] 0.7089329 0.8720074 0.3578715 0.4823529 0.3422134 0.9013164 0.6915562
## [176] 0.5849682 0.9013164 0.3518471 0.4856585 0.3012774 0.6083537 0.5286105
## [183] 0.3518471 0.4438323 0.5849682 0.3578715 0.4303199 0.5849682 0.3847888
## [190] 0.3847888 0.3353127 0.5407596 0.4442382 0.8028482 0.6979796 0.4159077
## [197] 0.3578715 0.8588919 0.5849682 0.4459734 0.8359821 0.7089329 0.3422134
## [204] 0.3578715 0.7503240 0.6469239 0.3518471 0.6979796 0.5310566 0.3422134
## [211] 0.6915562 0.7503240 0.7883774 0.4303199 0.5849429 0.7060067 0.5148881
## [218] 0.4442382 0.6768386 0.5407596 0.5900274 0.4303199 0.4499864 0.4271400
## [225] 0.7060067 0.6469239 0.8028482 0.4303199 0.5381415 0.4585010 0.5407596
## [232] 0.6469239 0.3353127 0.3518471 0.5381415 0.5407596 0.9013164 0.5849682
## [239] 0.7816998 0.8359821 0.7134610 0.3578715 0.5310566 0.7503240 0.5205655
## [246] 0.3578715 0.5001237 0.6385701 0.4012447 0.6385701 0.4442382 0.3818911
## [253] 0.3518471 0.4823529 0.8491772 0.4823529 0.4159077 0.3012774 0.4438323
## [260] 0.3518471 0.3518471 0.4823529 0.5849682 0.9930061 0.5407596 0.6979796
## [267] 0.3518471 0.3518471 0.5381415 0.3518471 0.4823529 0.3518471 0.8427266
## [274] 0.4823529 0.4928061 0.3422134 0.3432536 0.8199062 0.3847888 0.6318439
## [281] 0.7713814 0.5407596 0.3518471 0.3606856 0.5488259 0.4375700 0.4159077
## [288] 0.5381415 0.7713814 0.3518471 0.6318439 0.8199062 0.5286105 0.4823529
## [295] 0.3847888 0.5667758 0.5286105 0.4303199 0.3422134 0.4038396 0.5205655
## [302] 0.4499864 0.4823529 0.8647174 0.8028482 0.7388558 0.6915562 0.5407596
## [309] 0.3706966 0.5310566 0.3422134 0.4303199 0.5667758 0.5667758 0.8359821
## [316] 0.9013164 0.6083537 0.4459734 0.3578715 0.5407596 0.4442382 0.3847888
## [323] 0.3422134 0.3422134 0.4271400 0.3353127 0.8647174 0.9506226 0.5900274
## [330] 0.4066980 0.6736389 0.3422134 0.5381415 0.5407596 0.4442382 0.3028498
## [337] 0.4928061 0.4271400 0.6167284 0.4407191 0.6318439 0.3518471 0.5849682
## [344] 0.5381415 0.4442382 0.5999931 0.7089329 0.4669470 0.3518471 0.4856585
## [351] 0.8028482 0.5286105 0.7503240 0.4499864 0.3637692 0.3422134 0.5074578
## [358] 0.3578715 0.3012774 0.5286105 0.7089329 0.4459734 0.4823529 0.4303199
## [365] 0.3578715 0.3578715 0.3422134 0.3847888 0.4438323 0.7713814 0.5849429
## [372] 0.6167284 0.4669470 0.7883774 0.5488259 0.4499864 0.3518471 0.7134610
## [379] 0.4928061 0.3578715 0.9506226 0.3012774 0.8123670 0.5001237 0.3222582
## [386] 0.5310566 0.3847888 0.4407191 0.4459734 0.4823529 0.5074578 0.4499864
## [393] 0.3518471 0.6979796 0.4271400 0.5381415 0.3606856 0.6768386 0.7134610
## [400] 0.3422134 0.7883774 0.4066980 0.5849682 0.4407191 0.4271400 0.3578715
## [407] 0.4541645 0.3422134 0.6385701 0.7060067 0.6167284 0.3422134 0.4442382
## [414] 0.7134610 0.7497694 0.4407191 0.3578715 0.7089329 0.4585010 0.3422134
## [421] 0.6915562 0.3778936 0.3706966 0.3847888 0.3012774 0.3422134 0.4038396
## [428] 0.5286105 0.5310566 0.4928061 0.6979796 0.4303199 0.3422134 0.4823529
## [435] 0.6318439 0.4303199 0.4375700 0.4303199 0.5286105 0.3578715 0.7134610
## [442] 0.4541645 0.5381415 0.3899973 0.3518471 0.5407596 0.3578715 0.6867604
## [449] 0.6385701 0.8028482 0.6284269 0.4823529 0.6167284 0.5205655 0.4038396
## [456] 0.5407596 0.4271400 0.3422134 0.3012774 0.5900274 0.5381415 0.8359821
## [463] 0.5488259 0.9359103 0.5999931 0.5766692 0.3847888 0.3422134 0.4669470
## [470] 0.5381415 0.9480527 0.4856585 0.3847888 0.7060067 0.4823529 0.6768386
## [477] 0.3518471 0.3578715 0.3518471 0.4159077 0.4303199 0.4407191 0.3818911
## [484] 0.3422134 0.3422134 0.4194335 0.3847888 0.5407596 0.5001237 0.5310566
## [491] 0.8123670 0.5585965 0.3578715 0.7089329 0.9049578 0.3422134 0.4407191
## [498] 0.3706061 0.5286105 0.7503240
How many adjusted p-values are < 0.05?
table(adjusted_pvalues_kstest < 0.05)
##
## FALSE
## 500
<- c(unlist(random_MeanMethLevels), getMeth(bsseq_obj_escapees, type = "raw") %>% rowMeans(na.rm = TRUE) %>% as.vector()) %>% as.data.frame()
MeanMethLevels_df colnames(MeanMethLevels_df) <- "MeanMeth"
$Distribution <- c(paste0("Random_", rep(1:num_permutations, each = dim(bsseq_obj_escapees)[1])), rep("Escapees_CpGs", each = dim(bsseq_obj_escapees)[1]))
MeanMethLevels_df$Category <- "Non Escapees"
MeanMethLevels_df$Category[(nrow(MeanMethLevels_df)+1-nrow(bsseq_obj_escapees)):nrow(MeanMethLevels_df)] <- "Escapees" MeanMethLevels_df
ggplot(MeanMethLevels_df, aes(x = MeanMeth, color = Category, group = Distribution)) +
geom_density(alpha = 0.5) +
labs(
title = "Distribution CpG mean methylation levels in escapees and non-escapees regions",
x = "Mean Methylation Levels across cell types",
y = "Density",
color = "Category"
+
) scale_color_manual(values = c("Escapees" = "#FF7D00", "Non Escapees" = "#15616D")) +
theme_minimal()
date()
## [1] "Tue Jan 28 16:52:36 2025"
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] tidyr_1.3.0 ggplot2_3.4.2
## [3] dmrseq_1.18.1 bsseq_1.34.0
## [5] SummarizedExperiment_1.28.0 Biobase_2.58.0
## [7] MatrixGenerics_1.10.0 matrixStats_1.0.0
## [9] GenomicRanges_1.50.2 GenomeInfoDb_1.34.9
## [11] IRanges_2.32.0 S4Vectors_0.36.2
## [13] BiocGenerics_0.44.0 dplyr_1.1.2
## [15] plyr_1.8.8
##
## loaded via a namespace (and not attached):
## [1] AnnotationHub_3.6.0 BiocFileCache_2.6.1
## [3] splines_4.2.1 BiocParallel_1.32.6
## [5] digest_0.6.33 foreach_1.5.2
## [7] htmltools_0.5.5 fansi_1.0.4
## [9] magrittr_2.0.3 memoise_2.0.1
## [11] BSgenome_1.66.3 tzdb_0.4.0
## [13] limma_3.54.2 Biostrings_2.66.0
## [15] readr_2.1.4 R.utils_2.12.2
## [17] prettyunits_1.1.1 colorspace_2.1-0
## [19] blob_1.2.4 rappdirs_0.3.3
## [21] xfun_0.39 crayon_1.5.2
## [23] RCurl_1.98-1.12 jsonlite_1.8.7
## [25] annotatr_1.24.0 iterators_1.0.14
## [27] glue_1.6.2 gtable_0.3.3
## [29] zlibbioc_1.44.0 XVector_0.38.0
## [31] DelayedArray_0.24.0 Rhdf5lib_1.20.0
## [33] HDF5Array_1.26.0 scales_1.2.1
## [35] DBI_1.1.3 rngtools_1.5.2
## [37] Rcpp_1.0.11 xtable_1.8-4
## [39] progress_1.2.2 bumphunter_1.40.0
## [41] bit_4.0.5 httr_1.4.6
## [43] RColorBrewer_1.1-3 ellipsis_0.3.2
## [45] pkgconfig_2.0.3 XML_3.99-0.14
## [47] R.methodsS3_1.8.2 farver_2.1.1
## [49] sass_0.4.7 dbplyr_2.3.3
## [51] locfit_1.5-9.7 utf8_1.2.3
## [53] tidyselect_1.2.0 labeling_0.4.2
## [55] rlang_1.1.1 reshape2_1.4.4
## [57] later_1.3.1 AnnotationDbi_1.60.2
## [59] munsell_0.5.0 BiocVersion_3.16.0
## [61] tools_4.2.1 cachem_1.0.8
## [63] cli_3.6.1 generics_0.1.3
## [65] RSQLite_2.3.1 evaluate_0.21
## [67] stringr_1.5.0 fastmap_1.1.1
## [69] yaml_2.3.7 outliers_0.15
## [71] knitr_1.43 bit64_4.0.5
## [73] purrr_1.0.1 KEGGREST_1.38.0
## [75] nlme_3.1-162 doRNG_1.8.6
## [77] sparseMatrixStats_1.10.0 mime_0.12
## [79] R.oo_1.25.0 xml2_1.3.5
## [81] biomaRt_2.54.1 compiler_4.2.1
## [83] rstudioapi_0.15.0 filelock_1.0.2
## [85] curl_5.0.1 png_0.1-8
## [87] interactiveDisplayBase_1.36.0 tibble_3.2.1
## [89] bslib_0.5.0 stringi_1.7.12
## [91] highr_0.10 GenomicFeatures_1.50.4
## [93] lattice_0.21-8 Matrix_1.6-0
## [95] permute_0.9-7 vctrs_0.6.3
## [97] pillar_1.9.0 lifecycle_1.0.3
## [99] rhdf5filters_1.10.1 BiocManager_1.30.20
## [101] jquerylib_0.1.4 data.table_1.14.8
## [103] bitops_1.0-7 httpuv_1.6.11
## [105] rtracklayer_1.58.0 R6_2.5.1
## [107] BiocIO_1.8.0 promises_1.2.0.1
## [109] codetools_0.2-19 gtools_3.9.4
## [111] rhdf5_2.42.1 rjson_0.2.21
## [113] withr_2.5.0 regioneR_1.30.0
## [115] GenomicAlignments_1.34.1 Rsamtools_2.14.0
## [117] GenomeInfoDbData_1.2.9 parallel_4.2.1
## [119] hms_1.1.3 grid_4.2.1
## [121] rmarkdown_2.23 DelayedMatrixStats_1.20.0
## [123] shiny_1.7.4.1 restfulr_0.0.15