1. Environment setting

library(DSS)
## Loading required package: Biobase
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: BiocParallel
## Loading required package: bsseq
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## The following object is masked from 'package:Biobase':
## 
##     rowMedians
## Loading required package: parallel
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:IRanges':
## 
##     slice
## The following object is masked from 'package:S4Vectors':
## 
##     rename
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggplot2)
library(scales)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:bsseq':
## 
##     combine
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following object is masked from 'package:BiocGenerics':
## 
##     combine
library(ChIPseeker)
## 
## ChIPseeker v1.34.1  For help: https://guangchuangyu.github.io/software/ChIPseeker
## 
## If you use ChIPseeker in published research, please cite:
## Qianwen Wang, Ming Li, Tianzhi Wu, Li Zhan, Lin Li, Meijun Chen, Wenqin Xie, Zijing Xie, Erqiang Hu, Shuangbin Xu, Guangchuang Yu. Exploring epigenomic datasets by ChIPseeker. Current Protocols 2022, 2(10): e585
library(org.Hs.eg.db)
## Loading required package: AnnotationDbi
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:plotly':
## 
##     select
## 
library(ReactomePA)
## ReactomePA v1.42.0  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use ReactomePA in published research, please cite:
## Guangchuang Yu, Qing-Yu He. ReactomePA: an R/Bioconductor package for reactome pathway analysis and visualization. Molecular BioSystems 2016, 12(2):477-479
library(clusterProfiler)
## clusterProfiler v4.6.2  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
## 
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following object is masked from 'package:IRanges':
## 
##     slice
## The following object is masked from 'package:S4Vectors':
## 
##     rename
## The following object is masked from 'package:stats':
## 
##     filter
library(dmrseq)
library(annotatr)
library(DT)
library(data.table)
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:SummarizedExperiment':
## 
##     shift
## The following object is masked from 'package:GenomicRanges':
## 
##     shift
## The following object is masked from 'package:IRanges':
## 
##     shift
## The following objects are masked from 'package:S4Vectors':
## 
##     first, second

GTFFile <- "~/DataDir/3.TwistBedAnn/Input/gencode.v35.annotation.gtf.gz"
txdb_v35 <- GenomicFeatures::makeTxDbFromGFF(GTFFile, format="gtf")
## Import genomic features from the file as a GRanges object ...
## OK
## Prepare the 'metadata' data frame ... OK
## Make the TxDb object ...
## Warning in .get_cds_IDX(mcols0$type, mcols0$phase): The "phase" metadata column contains non-NA values for features of type
##   stop_codon. This information was ignored.
## OK
InputFolder <- params$InputFolder
OutputFolder <- params$OutputFolder

2. Loading data

2.1 BSSeq objects

Loading of bsseq object, with CpGs shared by 75% of samples (after sample selection).

bsseq_obj <- readRDS("~/DataDir/2.DifferentialAnalysis/Input/bsseq_obj_sharedby75ofall.rds") #getting bsseq object where sample selection and CpG filtering were already performed   

2.2 DMRs and DMLs

The DMRs here selected are called like this: delta=0.25 and p.threshold=0.0001

DMRs <- readRDS(paste0(InputFolder, '/', 'DMRs.rds'))
#DMLs <- readRDS(paste0(InputFolder, '/', 'DMLs.rds'))

2.3 Universe of genes

Importing Twist regions that were annotated

TwistAnnotated <- readRDS(params$TwistAnnotated)
TwistAnnotated %>% head()
##   seqnames start   end width strand
## 1     chr1 10466 10585   120      *
## 2     chr1 10790 10909   120      *
## 3     chr1 15806 15925   120      *
## 4     chr1 18768 18887   120      *
## 5     chr1 29357 29476   120      *
## 6     chr1 36544 36663   120      *
##                                                               ann
## 1                                            cg14817997,cpg_inter
## 2                                 cg16269199,cg26928153,cpg_inter
## 3 CTCF_binding_site,cg13869341,cpg_inter,promoter_flanking_region
## 4                                            cg14008030,cpg_inter
## 5                      cg12045430,cg20826792,cpg_islands,promoter
## 6                   cg18231760,cpg_inter,promoter_flanking_region
##         annotation geneChr geneStart geneEnd geneLength geneStrand
## 1 Promoter (1-2kb)       1     11869   14409       2541          1
## 2 Promoter (<=1kb)       1     11869   14409       2541          1
## 3 Promoter (1-2kb)       1     17369   17436         68          2
## 4 Promoter (1-2kb)       1     17369   17436         68          2
## 5 Promoter (<=1kb)       1     29554   31097       1544          1
## 6 Promoter (<=1kb)       1     34554   36081       1528          2
##   ensembl_gene_id_version      transcriptId distanceToTSS
## 1       ENSG00000223972.5 ENST00000456328.2         -1284
## 2       ENSG00000223972.5 ENST00000456328.2          -960
## 3       ENSG00000278267.1 ENST00000619216.1          1511
## 4       ENSG00000278267.1 ENST00000619216.1         -1332
## 5       ENSG00000243485.5 ENST00000473358.1           -78
## 6       ENSG00000237613.2 ENST00000417324.1          -463
##                                                               flank_txIds
## 1                   ENST00000456328.2;ENST00000450305.2;ENST00000488147.1
## 2                   ENST00000456328.2;ENST00000450305.2;ENST00000488147.1
## 3 ENST00000456328.2;ENST00000450305.2;ENST00000488147.1;ENST00000619216.1
## 4                   ENST00000456328.2;ENST00000488147.1;ENST00000619216.1
## 5 ENST00000488147.1;ENST00000473358.1;ENST00000469289.1;ENST00000607096.1
## 6                                     ENST00000417324.1;ENST00000461467.1
##                                                             flank_geneIds
## 1                   ENSG00000223972.5;ENSG00000223972.5;ENSG00000227232.5
## 2                   ENSG00000223972.5;ENSG00000223972.5;ENSG00000227232.5
## 3 ENSG00000223972.5;ENSG00000223972.5;ENSG00000227232.5;ENSG00000278267.1
## 4                   ENSG00000223972.5;ENSG00000227232.5;ENSG00000278267.1
## 5 ENSG00000227232.5;ENSG00000243485.5;ENSG00000243485.5;ENSG00000284332.1
## 6                                     ENSG00000237613.2;ENSG00000237613.2
##   flank_gene_distances ensembl_gene_id hgnc_symbol external_gene_name
## 1        -1284;-1425;0 ENSG00000223972     DDX11L1            DDX11L1
## 2         -960;-1101;0 ENSG00000223972     DDX11L1            DDX11L1
## 3     3937;3796;0;1511 ENSG00000278267   MIR6859-1          MIR6859-1
## 4         6899;0;-1332 ENSG00000278267   MIR6859-1          MIR6859-1
## 5      0;-78;-791;-890 ENSG00000243485 MIR1302-2HG        MIR1302-2HG
## 6            -463;-471 ENSG00000237613     FAM138A            FAM138A
##                         gene_biotype
## 1 transcribed_unprocessed_pseudogene
## 2 transcribed_unprocessed_pseudogene
## 3                              miRNA
## 4                              miRNA
## 5                             lncRNA
## 6                             lncRNA
##                                                                        description
## 1   DEAD/H-box helicase 11 like 1 (pseudogene) [Source:HGNC Symbol;Acc:HGNC:37102]
## 2   DEAD/H-box helicase 11 like 1 (pseudogene) [Source:HGNC Symbol;Acc:HGNC:37102]
## 3                              microRNA 6859-1 [Source:HGNC Symbol;Acc:HGNC:50039]
## 4                              microRNA 6859-1 [Source:HGNC Symbol;Acc:HGNC:50039]
## 5                          MIR1302-2 host gene [Source:HGNC Symbol;Acc:HGNC:52482]
## 6 family with sequence similarity 138 member A [Source:HGNC Symbol;Acc:HGNC:32334]
##   chromosome_name start_position end_position
## 1               1          11869        14409
## 2               1          11869        14409
## 3               1          17369        17436
## 4               1          17369        17436
## 5               1          29554        31109
## 6               1          34554        36081
Genes_df <- TwistAnnotated[, c("ensembl_gene_id_version", "ensembl_gene_id", "hgnc_symbol", "gene_biotype")] %>% dplyr::distinct()%>%filter(!is.na(hgnc_symbol))%>%filter(!hgnc_symbol%in%"")
rownames(Genes_df) <- NULL

These are the genes with the same symbol but different ensembl id

Genes_df[Genes_df$hgnc_symbol%in%Genes_df[duplicated(Genes_df$hgnc_symbol), "hgnc_symbol"],]
##       ensembl_gene_id_version ensembl_gene_id hgnc_symbol
## 3309        ENSG00000285053.1 ENSG00000285053        TBCE
## 3310        ENSG00000284770.2 ENSG00000284770        TBCE
## 20449       ENSG00000237940.3 ENSG00000237940   LINC01238
## 20451       ENSG00000261186.2 ENSG00000261186   LINC01238
## 21924      ENSG00000206195.11 ENSG00000206195      DUXAP8
## 21926       ENSG00000271672.1 ENSG00000271672      DUXAP8
## 24581       ENSG00000284862.3 ENSG00000284862      CCDC39
## 24582      ENSG00000145075.13 ENSG00000145075      CCDC39
## 30308       ENSG00000272655.2 ENSG00000272655     POLR2J4
## 30309       ENSG00000214783.9 ENSG00000214783     POLR2J4
##                             gene_biotype
## 3309                      protein_coding
## 3310                      protein_coding
## 20449                             lncRNA
## 20451                             lncRNA
## 21924                             lncRNA
## 21926   transcribed_processed_pseudogene
## 24581                     protein_coding
## 24582                             lncRNA
## 30308 transcribed_unprocessed_pseudogene
## 30309                             lncRNA
Genes_df$entrez_gene_id <- as.vector(mapIds(org.Hs.eg.db, Genes_df$hgnc_symbol, "ENTREZID","SYMBOL"))
## 'select()' returned 1:many mapping between keys and columns
nrow(Genes_df[is.na(Genes_df$entrez_gene_id),]) #number of genes without an entrez id
## [1] 421
GeneUniverse_symbol <- unique(Genes_df$hgnc_symbol)
GeneUniverse_entrezid <- na.omit(unique(Genes_df$entrez_gene_id))

3. Transform DataFrame with DMRs info to GRanges object

Remember that position form bedGraph files are 0-based while R works with 1-based position and therefore also the packages we will go to use, this is the reason why I set starts.in.df.are.0based = TRUE in creating the GRanges objects, it converts 0-based to 1-based coordinates.

DMRs_GRanges <- DMRs
#DMLs_GRanges <- DMLs

for(i in 1:length(DMRs)){
  DMRs_GRanges[[i]] <- makeGRangesFromDataFrame(DMRs_GRanges[[i]], keep.extra.columns = TRUE, 
                                                start.field = "start", end.field = "end", starts.in.df.are.0based = TRUE)
#DMLs_GRanges[[i]] <- makeGRangesFromDataFrame(DMLs_GRanges[[i]], keep.extra.columns = TRUE, 
  #start.field = "start", end.field = "end", starts.in.df.are.0based = TRUE)
  } 
  • For hiPSCs vs hEGCLCs we have 732 DMRs.
  • For hPGCLCs vs hEGCLCs we have 2687 DMRs.
  • For hPGCLCs vs hiPSCs we have 2415 DMRs.
  • For iMeLCs vs hiPSCs we have 66 DMRs.
  • For hPGCLCs vs iMeLCs we have 1416 DMRs.
  • For iMeLCs vs hEGCLCs we have 812 DMRs.

4. DMRs annotation

4.1 hiPSCs vs hEGCLCs

4.1.1 Genetic annotation and visualization with ChIPseeker

DMRs are now annotated to their associated genes and genomic regions using the annotatePeaks function of the ChIPseekerR package, using a TxDb object. The GenomicFeatures package uses TxDb objects to store transcript metadata. This class maps the 5’ and 3’ untranslated regions (UTRs), protein coding sequences (CDSs) and exons for a set of mRNA transcripts to their associated genome. All TxDb objects are backed by a SQLite database that manages genomic locations and the relationships between pre-processed mRNA transcripts, exons, protein coding sequences, and their related gene identifiers.

TxDb created by us

peakAnnoList <- lapply(list("Higher methylation in hiPSCs" = DMRs_GRanges$hEGCLCsvshiPSCs[DMRs_GRanges$hEGCLCsvshiPSCs$diff.Methy<0,], "Higher methylation in hEGCLCs" = DMRs_GRanges$hEGCLCsvshiPSCs[DMRs_GRanges$hEGCLCsvshiPSCs$diff.Methy>0,]), annotatePeak, TxDb=txdb_v35, tssRegion=c(-3000, 3000), verbose=FALSE)
ChIPseeker::plotAnnoBar(peakAnnoList)

ChIPseeker::plotDistToTSS(peakAnnoList)

Given a list of gene set, compareCluster function will compute profiles of each gene cluster.

genes = lapply(peakAnnoList, function(i) unique(as.data.frame(i)$geneId))
names(genes) = sub("_", "\n", names(genes))
any(duplicated(genes$`Higher methylation in hiPSCs`))
## [1] FALSE
any(duplicated(genes$`Higher methylation in hEGCLCs`))
## [1] FALSE

There are no duplicated ensembl ids

table(genes$`Higher methylation in hEGCLCs` %in% Genes_df$ensembl_gene_id_version)
## 
## FALSE  TRUE 
##     4    11
genes$`Higher methylation in hEGCLCs` [!genes$`Higher methylation in hEGCLCs` %in% Genes_df$ensembl_gene_id_version]
## [1] "ENSG00000287690.1"  "ENSG00000286525.1"  "ENSG00000231078.1" 
## [4] "ENSG00000205622.11"
table(genes$`Higher methylation in hiPSCs`%in% Genes_df$ensembl_gene_id_version)
## 
## FALSE  TRUE 
##   155   530
DMG_uphiPSCsvshEGCLCs <- Genes_df[Genes_df$ensembl_gene_id_version %in% genes$`Higher methylation in hiPSCs`, "entrez_gene_id"]
DMG_uphiPSCsvshEGCLCs <- na.omit(DMG_uphiPSCsvshEGCLCs[!DMG_uphiPSCsvshEGCLCs %in% ""])
DMG_downhiPSCsvshEGCLCs <- Genes_df[Genes_df$ensembl_gene_id_version %in% genes$`Higher methylation in hEGCLCs`, "entrez_gene_id"]
DMG_downhiPSCsvshEGCLCs <- na.omit(DMG_downhiPSCsvshEGCLCs[!DMG_downhiPSCsvshEGCLCs %in% ""])
any(duplicated(DMG_uphiPSCsvshEGCLCs))
## [1] FALSE
any(duplicated(DMG_downhiPSCsvshEGCLCs))
## [1] FALSE
genes$`Higher methylation in hiPSCs` <- DMG_uphiPSCsvshEGCLCs
genes$`Higher methylation in hEGCLCs` <- DMG_downhiPSCsvshEGCLCs
compKEGG <- compareCluster(geneCluster   = genes,
                         fun           = "enrichKEGG",
                         organism="hsa",
                         pvalueCutoff  = 0.05,
                         pAdjustMethod = "BH", 
                         universe = GeneUniverse_entrezid)
## Warning in compareCluster(geneCluster = genes, fun = "enrichKEGG", organism =
## "hsa", : No enrichment found in any of gene cluster, please check your input...

There are no significant terms (They have Padj>0.05)

#dotplot(compKEGG, showCategory = 15, title = "KEGG Pathway Enrichment Analysis")
compGO <- compareCluster(geneCluster   = genes,
                         fun           = "enrichGO",
                         keyType = "ENTREZID",
                         pvalueCutoff  = 0.05,
                         pAdjustMethod = "BH", 
                         OrgDb='org.Hs.eg.db', 
                         universe=GeneUniverse_entrezid,
                         ont = "ALL")
dotplot(compGO, showCategory = 15, title = "GO Enrichment Analysis")

A new DMRs_annotated object is created in which annotation from ChIPseeker is added.

DMRs_annotated <- DMRs_GRanges[-2]

mcols(DMRs_annotated$hEGCLCsvshiPSCs)$ChipSeekerAnn <- NA
mcols(DMRs_annotated$hEGCLCsvshiPSCs)$ChipSeekerDistanceToTss <- NA
mcols(DMRs_annotated$hEGCLCsvshiPSCs)$ChipSeekerGeneId <- NA

m <- findOverlaps(DMRs_annotated$hEGCLCsvshiPSCs, peakAnnoList[["Higher methylation in hiPSCs"]]@anno)
mcols(DMRs_annotated$hEGCLCsvshiPSCs)[queryHits(m), "ChipSeekerAnn"] <- mcols(peakAnnoList[["Higher methylation in hiPSCs"]]@anno)[subjectHits(m), "annotation"]
mcols(DMRs_annotated$hEGCLCsvshiPSCs)[queryHits(m), "ChipSeekerDistanceToTss"] <- mcols(peakAnnoList[["Higher methylation in hiPSCs"]]@anno)[subjectHits(m), "distanceToTSS"]
mcols(DMRs_annotated$hEGCLCsvshiPSCs)[queryHits(m), "ChipSeekerGeneId"] <- mcols(peakAnnoList[["Higher methylation in hiPSCs"]]@anno)[subjectHits(m), "geneId"]

m <- findOverlaps(DMRs_annotated$hEGCLCsvshiPSCs, peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)
mcols(DMRs_annotated$hEGCLCsvshiPSCs)[queryHits(m), "ChipSeekerAnn"] <- mcols(peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)[subjectHits(m), "annotation"]
mcols(DMRs_annotated$hEGCLCsvshiPSCs)[queryHits(m), "ChipSeekerDistanceToTss"] <- mcols(peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)[subjectHits(m), "distanceToTSS"]
mcols(DMRs_annotated$hEGCLCsvshiPSCs)[queryHits(m), "ChipSeekerGeneId"] <- mcols(peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)[subjectHits(m), "geneId"]

Adding entrez gene id, gene symbol and gene information

DMRs_annotated$hEGCLCsvshiPSCs <- as.data.frame(DMRs_annotated$hEGCLCsvshiPSCs)
DMRs_annotated$hEGCLCsvshiPSCs <- dplyr::left_join(DMRs_annotated$hEGCLCsvshiPSCs, Genes_df, by=c('ChipSeekerGeneId' = 'ensembl_gene_id_version')) 
DMRs_annotated$hEGCLCsvshiPSCs[!is.na(DMRs_annotated$hEGCLCsvshiPSCs$hgnc_symbol),] %>% select(-c("ChipSeekerDistanceToTss", "ChipSeekerGeneId", "ensembl_gene_id", "entrez_gene_id")) %>% distinct() %>%
    datatable(class = 'hover', rownames = FALSE, caption="DMRs - hEGCLCsvshiPSCs", extension='Buttons', escape = FALSE, 
              options = list(pageLength=20, dom='Bfrtip', autoWidth=TRUE, 
                             buttons=list(c('csv', 'excel'))))

4.2 hPGCLCs vs hEGCLCs

4.2.1 Genetic annotation and visualization with ChIPseeker

DMRs are now annotated to their associated genes and genomic regions using the annotatePeaks function of the ChIPseekerR package, using a TxDb object. The GenomicFeatures package uses TxDb objects to store transcript metadata. This class maps the 5’ and 3’ untranslated regions (UTRs), protein coding sequences (CDSs) and exons for a set of mRNA transcripts to their associated genome. All TxDb objects are backed by a SQLite database that manages genomic locations and the relationships between pre-processed mRNA transcripts, exons, protein coding sequences, and their related gene identifiers.

TxDb created by us

peakAnnoList <- lapply(list("Higher methylation in hPGCLCs" = DMRs_GRanges$hEGCLCsvshPGCLCs[DMRs_GRanges$hEGCLCsvshPGCLCs$diff.Methy<0,], "Higher methylation in hEGCLCs" = DMRs_GRanges$hEGCLCsvshPGCLCs[DMRs_GRanges$hEGCLCsvshPGCLCs$diff.Methy>0,]), annotatePeak, TxDb=txdb_v35, tssRegion=c(-3000, 3000), verbose=FALSE)
ChIPseeker::plotAnnoBar(peakAnnoList)

ChIPseeker::plotDistToTSS(peakAnnoList)

Given a list of gene set, compareCluster function will compute profiles of each gene cluster.

genes = lapply(peakAnnoList, function(i) unique(as.data.frame(i)$geneId))
names(genes) = sub("_", "\n", names(genes))
any(duplicated(genes$`Higher methylation in hPGCLCs`))
## [1] FALSE
any(duplicated(genes$`Higher methylation in hEGCLCs`))
## [1] FALSE

There are no duplicated ensembl ids

table(genes$`Higher methylation in hEGCLCs` %in% Genes_df$ensembl_gene_id_version)
## 
## FALSE  TRUE 
##   389  1310
genes$`Higher methylation in hEGCLCs` [!genes$`Higher methylation in hEGCLCs` %in% Genes_df$ensembl_gene_id_version]
##   [1] "ENSG00000272849.1"  "ENSG00000224079.1"  "ENSG00000225339.4" 
##   [4] "ENSG00000275178.1"  "ENSG00000288549.1"  "ENSG00000275088.1" 
##   [7] "ENSG00000286295.1"  "ENSG00000279135.1"  "ENSG00000280326.1" 
##  [10] "ENSG00000267665.2"  "ENSG00000237326.1"  "ENSG00000287403.1" 
##  [13] "ENSG00000233052.1"  "ENSG00000286535.1"  "ENSG00000284691.1" 
##  [16] "ENSG00000263017.1"  "ENSG00000285855.1"  "ENSG00000285163.1" 
##  [19] "ENSG00000287428.1"  "ENSG00000230676.1"  "ENSG00000234275.4" 
##  [22] "ENSG00000287690.1"  "ENSG00000259362.2"  "ENSG00000287407.1" 
##  [25] "ENSG00000267244.5"  "ENSG00000279412.1"  "ENSG00000237126.8" 
##  [28] "ENSG00000278716.1"  "ENSG00000238010.1"  "ENSG00000230526.1" 
##  [31] "ENSG00000274373.1"  "ENSG00000276166.1"  "ENSG00000255342.1" 
##  [34] "ENSG00000249808.3"  "ENSG00000238031.1"  "ENSG00000287787.1" 
##  [37] "ENSG00000253397.1"  "ENSG00000241449.6"  "ENSG00000279065.1" 
##  [40] "ENSG00000256564.1"  "ENSG00000227479.1"  "ENSG00000270490.1" 
##  [43] "ENSG00000282718.1"  "ENSG00000233891.8"  "ENSG00000215381.3" 
##  [46] "ENSG00000272988.1"  "ENSG00000199949.2"  "ENSG00000231113.2" 
##  [49] "ENSG00000237371.1"  "ENSG00000253543.1"  "ENSG00000257883.1" 
##  [52] "ENSG00000270911.1"  "ENSG00000238232.1"  "ENSG00000254607.2" 
##  [55] "ENSG00000287823.1"  "ENSG00000262884.1"  "ENSG00000258698.1" 
##  [58] "ENSG00000270972.1"  "ENSG00000286240.1"  "ENSG00000284966.3" 
##  [61] "ENSG00000235811.1"  "ENSG00000279931.1"  "ENSG00000233683.1" 
##  [64] "ENSG00000237720.2"  "ENSG00000245688.1"  "ENSG00000234793.1" 
##  [67] "ENSG00000280444.1"  "ENSG00000231482.3"  "ENSG00000267090.1" 
##  [70] "ENSG00000228154.3"  "ENSG00000286525.1"  "ENSG00000266933.2" 
##  [73] "ENSG00000287046.1"  "ENSG00000282952.1"  "ENSG00000260855.1" 
##  [76] "ENSG00000216775.3"  "ENSG00000279711.1"  "ENSG00000230990.2" 
##  [79] "ENSG00000261054.1"  "ENSG00000279705.1"  "ENSG00000286286.1" 
##  [82] "ENSG00000262652.1"  "ENSG00000287073.1"  "ENSG00000268750.7" 
##  [85] "ENSG00000261938.1"  "ENSG00000285773.1"  "ENSG00000255845.1" 
##  [88] "ENSG00000250577.1"  "ENSG00000287161.1"  "ENSG00000261189.1" 
##  [91] "ENSG00000205325.3"  "ENSG00000231170.7"  "ENSG00000254755.1" 
##  [94] "ENSG00000254933.1"  "ENSG00000286502.1"  "ENSG00000226047.1" 
##  [97] "ENSG00000225092.2"  "ENSG00000261606.5"  "ENSG00000269667.2" 
## [100] "ENSG00000261976.2"  "ENSG00000234428.2"  "ENSG00000287906.1" 
## [103] "ENSG00000259341.2"  "ENSG00000279076.1"  "ENSG00000287338.1" 
## [106] "ENSG00000269976.1"  "ENSG00000231078.1"  "ENSG00000232408.1" 
## [109] "ENSG00000235994.4"  "ENSG00000226169.1"  "ENSG00000278330.1" 
## [112] "ENSG00000261177.1"  "ENSG00000253140.2"  "ENSG00000219133.2" 
## [115] "ENSG00000256955.2"  "ENSG00000237372.4"  "ENSG00000254111.3" 
## [118] "ENSG00000250409.1"  "ENSG00000271709.1"  "ENSG00000275155.1" 
## [121] "ENSG00000226868.1"  "ENSG00000229628.1"  "ENSG00000234949.2" 
## [124] "ENSG00000250230.3"  "ENSG00000234378.1"  "ENSG00000261211.1" 
## [127] "ENSG00000224545.1"  "ENSG00000203647.2"  "ENSG00000260473.2" 
## [130] "ENSG00000249236.2"  "ENSG00000286805.1"  "ENSG00000275741.1" 
## [133] "ENSG00000242009.2"  "ENSG00000261534.1"  "ENSG00000279841.1" 
## [136] "ENSG00000231876.8"  "ENSG00000256481.1"  "ENSG00000253445.1" 
## [139] "ENSG00000287680.1"  "ENSG00000225156.2"  "ENSG00000287235.1" 
## [142] "ENSG00000205622.11" "ENSG00000287089.1"  "ENSG00000201196.1" 
## [145] "ENSG00000231539.1"  "ENSG00000279261.2"  "ENSG00000225028.1" 
## [148] "ENSG00000236676.1"  "ENSG00000250338.1"  "ENSG00000260185.1" 
## [151] "ENSG00000224195.1"  "ENSG00000225140.1"  "ENSG00000253307.1" 
## [154] "ENSG00000254364.1"  "ENSG00000259252.1"  "ENSG00000227527.2" 
## [157] "ENSG00000270531.1"  "ENSG00000228361.2"  "ENSG00000287756.1" 
## [160] "ENSG00000231073.1"  "ENSG00000288040.1"  "ENSG00000286198.1" 
## [163] "ENSG00000267658.1"  "ENSG00000201549.1"  "ENSG00000254205.1" 
## [166] "ENSG00000287828.1"  "ENSG00000259920.1"  "ENSG00000243491.1" 
## [169] "ENSG00000280110.1"  "ENSG00000258560.1"  "ENSG00000254777.5" 
## [172] "ENSG00000285707.1"  "ENSG00000230163.1"  "ENSG00000280354.1" 
## [175] "ENSG00000248359.2"  "ENSG00000234640.1"  "ENSG00000225187.1" 
## [178] "ENSG00000256314.1"  "ENSG00000279430.1"  "ENSG00000235563.1" 
## [181] "ENSG00000225632.1"  "ENSG00000264914.1"  "ENSG00000262869.1" 
## [184] "ENSG00000283782.2"  "ENSG00000235538.3"  "ENSG00000242531.1" 
## [187] "ENSG00000279526.1"  "ENSG00000261555.1"  "ENSG00000254789.3" 
## [190] "ENSG00000201042.1"  "ENSG00000246790.2"  "ENSG00000253688.2" 
## [193] "ENSG00000264093.1"  "ENSG00000232409.1"  "ENSG00000287387.1" 
## [196] "ENSG00000250007.7"  "ENSG00000248645.1"  "ENSG00000286320.1" 
## [199] "ENSG00000273160.1"  "ENSG00000258935.1"  "ENSG00000234174.1" 
## [202] "ENSG00000196696.13" "ENSG00000254559.1"  "ENSG00000287822.1" 
## [205] "ENSG00000251270.1"  "ENSG00000237617.1"  "ENSG00000253125.1" 
## [208] "ENSG00000262445.3"  "ENSG00000259420.5"  "ENSG00000226087.2" 
## [211] "ENSG00000229673.1"  "ENSG00000236936.1"  "ENSG00000225315.2" 
## [214] "ENSG00000286641.1"  "ENSG00000212589.1"  "ENSG00000279982.1" 
## [217] "ENSG00000226706.1"  "ENSG00000280270.1"  "ENSG00000286356.1" 
## [220] "ENSG00000217514.1"  "ENSG00000272097.2"  "ENSG00000250576.1" 
## [223] "ENSG00000259579.1"  "ENSG00000279409.1"  "ENSG00000232716.2" 
## [226] "ENSG00000261702.2"  "ENSG00000277825.1"  "ENSG00000260004.1" 
## [229] "ENSG00000267245.1"  "ENSG00000235023.1"  "ENSG00000254290.1" 
## [232] "ENSG00000286610.1"  "ENSG00000226927.1"  "ENSG00000286635.1" 
## [235] "ENSG00000248521.1"  "ENSG00000288079.1"  "ENSG00000260640.1" 
## [238] "ENSG00000277319.1"  "ENSG00000224731.1"  "ENSG00000260454.2" 
## [241] "ENSG00000286505.1"  "ENSG00000261095.1"  "ENSG00000286364.1" 
## [244] "ENSG00000262966.2"  "ENSG00000261472.1"  "ENSG00000228683.1" 
## [247] "ENSG00000276476.3"  "ENSG00000274379.1"  "ENSG00000235096.2" 
## [250] "ENSG00000226398.1"  "ENSG00000201541.1"  "ENSG00000288639.1" 
## [253] "ENSG00000280392.1"  "ENSG00000249727.2"  "ENSG00000226965.2" 
## [256] "ENSG00000271114.1"  "ENSG00000251445.1"  "ENSG00000229305.1" 
## [259] "ENSG00000201584.1"  "ENSG00000203392.3"  "ENSG00000280004.1" 
## [262] "ENSG00000287975.1"  "ENSG00000283128.2"  "ENSG00000228613.1" 
## [265] "ENSG00000258346.1"  "ENSG00000248962.1"  "ENSG00000224794.2" 
## [268] "ENSG00000287903.1"  "ENSG00000261723.1"  "ENSG00000285367.1" 
## [271] "ENSG00000244260.2"  "ENSG00000287560.1"  "ENSG00000233096.1" 
## [274] "ENSG00000231251.1"  "ENSG00000286390.1"  "ENSG00000256452.1" 
## [277] "ENSG00000272922.1"  "ENSG00000263508.6"  "ENSG00000280217.1" 
## [280] "ENSG00000226963.1"  "ENSG00000280138.1"  "ENSG00000249930.1" 
## [283] "ENSG00000202318.1"  "ENSG00000262810.1"  "ENSG00000261710.1" 
## [286] "ENSG00000283511.1"  "ENSG00000237422.2"  "ENSG00000268080.2" 
## [289] "ENSG00000235886.1"  "ENSG00000288615.1"  "ENSG00000233633.2" 
## [292] "ENSG00000229618.3"  "ENSG00000272215.1"  "ENSG00000280222.1" 
## [295] "ENSG00000263723.1"  "ENSG00000259202.1"  "ENSG00000220739.2" 
## [298] "ENSG00000272715.1"  "ENSG00000287145.1"  "ENSG00000229660.1" 
## [301] "ENSG00000285534.1"  "ENSG00000228345.2"  "ENSG00000279778.1" 
## [304] "ENSG00000227920.3"  "ENSG00000253051.1"  "ENSG00000263499.1" 
## [307] "ENSG00000287726.1"  "ENSG00000277831.1"  "ENSG00000280047.1" 
## [310] "ENSG00000226096.1"  "ENSG00000221461.2"  "ENSG00000258760.1" 
## [313] "ENSG00000258088.1"  "ENSG00000259200.2"  "ENSG00000248702.1" 
## [316] "ENSG00000206814.1"  "ENSG00000284652.1"  "ENSG00000260788.6" 
## [319] "ENSG00000286757.2"  "ENSG00000255446.1"  "ENSG00000258399.7" 
## [322] "ENSG00000186244.7"  "ENSG00000212580.1"  "ENSG00000255021.1" 
## [325] "ENSG00000176515.1"  "ENSG00000233981.1"  "ENSG00000237920.2" 
## [328] "ENSG00000270927.1"  "ENSG00000260304.1"  "ENSG00000287882.1" 
## [331] "ENSG00000232072.1"  "ENSG00000244662.1"  "ENSG00000285775.1" 
## [334] "ENSG00000273497.1"  "ENSG00000227775.3"  "ENSG00000234017.1" 
## [337] "ENSG00000259989.1"  "ENSG00000229607.1"  "ENSG00000267579.1" 
## [340] "ENSG00000254186.3"  "ENSG00000285623.1"  "ENSG00000254458.1" 
## [343] "ENSG00000203739.4"  "ENSG00000271384.1"  "ENSG00000286276.1" 
## [346] "ENSG00000266101.1"  "ENSG00000271795.1"  "ENSG00000199196.1" 
## [349] "ENSG00000277938.1"  "ENSG00000207210.1"  "ENSG00000268926.3" 
## [352] "ENSG00000286101.1"  "ENSG00000270174.1"  "ENSG00000255605.1" 
## [355] "ENSG00000257726.1"  "ENSG00000254538.1"  "ENSG00000278999.1" 
## [358] "ENSG00000230404.1"  "ENSG00000212590.1"  "ENSG00000242757.1" 
## [361] "ENSG00000256708.1"  "ENSG00000248733.1"  "ENSG00000271269.1" 
## [364] "ENSG00000235152.1"  "ENSG00000286683.1"  "ENSG00000236466.2" 
## [367] "ENSG00000199751.1"  "ENSG00000241354.2"  "ENSG00000224593.1" 
## [370] "ENSG00000223027.1"  "ENSG00000199200.2"  "ENSG00000276332.1" 
## [373] "ENSG00000277050.1"  "ENSG00000255545.8"  "ENSG00000197099.8" 
## [376] "ENSG00000270120.1"  "ENSG00000237844.2"  "ENSG00000203446.2" 
## [379] "ENSG00000287010.1"  "ENSG00000243116.1"  "ENSG00000228176.1" 
## [382] "ENSG00000201393.1"  "ENSG00000286176.2"  "ENSG00000231057.4" 
## [385] "ENSG00000283579.1"  "ENSG00000259771.1"  "ENSG00000223727.6" 
## [388] "ENSG00000258111.1"  "ENSG00000271475.1"
table(genes$`Higher methylation in hPGCLCs`%in% Genes_df$ensembl_gene_id_version)
## 
## FALSE  TRUE 
##   162   591
DMG_uphPGCLCsvshEGCLCs <- Genes_df[Genes_df$ensembl_gene_id_version %in% genes$`Higher methylation in hPGCLCs`, "entrez_gene_id"]
DMG_uphPGCLCsvshEGCLCs <- na.omit(DMG_uphPGCLCsvshEGCLCs[!DMG_uphPGCLCsvshEGCLCs %in% ""])
DMG_downhPGCLCsvshEGCLCs <- Genes_df[Genes_df$ensembl_gene_id_version %in% genes$`Higher methylation in hEGCLCs`, "entrez_gene_id"]
DMG_downhPGCLCsvshEGCLCs <- na.omit(DMG_downhPGCLCsvshEGCLCs[!DMG_downhPGCLCsvshEGCLCs %in% ""])
any(duplicated(DMG_uphPGCLCsvshEGCLCs))
## [1] FALSE
any(duplicated(DMG_downhPGCLCsvshEGCLCs))
## [1] FALSE
genes$`Higher methylation in hPGCLCs` <- DMG_uphPGCLCsvshEGCLCs
genes$`Higher methylation in hEGCLCs` <- DMG_downhPGCLCsvshEGCLCs
compKEGG <- compareCluster(geneCluster   = genes,
                         fun           = "enrichKEGG",
                         organism="hsa",
                         pvalueCutoff  = 0.05,
                         pAdjustMethod = "BH", 
                         universe = GeneUniverse_entrezid)

There are no significant terms (They have Padj>0.05)

dotplot(compKEGG, showCategory = 15, title = "KEGG Pathway Enrichment Analysis")

compGO <- compareCluster(geneCluster   = genes,
                         fun           = "enrichGO",
                         keyType = "ENTREZID",
                         pvalueCutoff  = 0.05,
                         pAdjustMethod = "BH", 
                         OrgDb='org.Hs.eg.db', 
                         universe=GeneUniverse_entrezid,
                         ont = "ALL")
dotplot(compGO, showCategory = 15, title = "GO Enrichment Analysis")

A new DMRs_annotated object is created in which annotation from ChIPseeker is added.

mcols(DMRs_annotated$hEGCLCsvshPGCLCs)$ChipSeekerAnn <- NA
mcols(DMRs_annotated$hEGCLCsvshPGCLCs)$ChipSeekerDistanceToTss <- NA
mcols(DMRs_annotated$hEGCLCsvshPGCLCs)$ChipSeekerGeneId <- NA

m <- findOverlaps(DMRs_annotated$hEGCLCsvshPGCLCs, peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)
mcols(DMRs_annotated$hEGCLCsvshPGCLCs)[queryHits(m), "ChipSeekerAnn"] <- mcols(peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)[subjectHits(m), "annotation"]
mcols(DMRs_annotated$hEGCLCsvshPGCLCs)[queryHits(m), "ChipSeekerDistanceToTss"] <- mcols(peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)[subjectHits(m), "distanceToTSS"]
mcols(DMRs_annotated$hEGCLCsvshPGCLCs)[queryHits(m), "ChipSeekerGeneId"] <- mcols(peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)[subjectHits(m), "geneId"]

m <- findOverlaps(DMRs_annotated$hEGCLCsvshPGCLCs, peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)
mcols(DMRs_annotated$hEGCLCsvshPGCLCs)[queryHits(m), "ChipSeekerAnn"] <- mcols(peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)[subjectHits(m), "annotation"]
mcols(DMRs_annotated$hEGCLCsvshPGCLCs)[queryHits(m), "ChipSeekerDistanceToTss"] <- mcols(peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)[subjectHits(m), "distanceToTSS"]
mcols(DMRs_annotated$hEGCLCsvshPGCLCs)[queryHits(m), "ChipSeekerGeneId"] <- mcols(peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)[subjectHits(m), "geneId"]

Adding entrez gene id, gene symbol and gene information

DMRs_annotated$hEGCLCsvshPGCLCs <- as.data.frame(DMRs_annotated$hEGCLCsvshPGCLCs)
DMRs_annotated$hEGCLCsvshPGCLCs <- dplyr::left_join(DMRs_annotated$hEGCLCsvshPGCLCs, Genes_df, by=c('ChipSeekerGeneId' = 'ensembl_gene_id_version')) 
DMRs_annotated$hEGCLCsvshPGCLCs[!is.na(DMRs_annotated$hEGCLCsvshPGCLCs$hgnc_symbol),] %>% select(-c("ChipSeekerDistanceToTss", "ChipSeekerGeneId", "ensembl_gene_id", "entrez_gene_id")) %>% distinct() %>%
    datatable(class = 'hover', rownames = FALSE, caption="DMRs - hEGCLCsvshPGCLCs", extension='Buttons', escape = FALSE, 
              options = list(pageLength=20, dom='Bfrtip', autoWidth=TRUE, 
                             buttons=list(c('csv', 'excel'))))

4.3 hPGCLCs vs hiPSCs

4.3.1 Genetic annotation and visualization with ChIPseeker

DMRs are now annotated to their associated genes and genomic regions using the annotatePeaks function of the ChIPseekerR package, using a TxDb object. The GenomicFeatures package uses TxDb objects to store transcript metadata. This class maps the 5’ and 3’ untranslated regions (UTRs), protein coding sequences (CDSs) and exons for a set of mRNA transcripts to their associated genome. All TxDb objects are backed by a SQLite database that manages genomic locations and the relationships between pre-processed mRNA transcripts, exons, protein coding sequences, and their related gene identifiers.

TxDb created by us

peakAnnoList <- lapply(list("Higher methylation in hPGCLCs" = DMRs_GRanges$hiPSCsvshPGCLCs[DMRs_GRanges$hiPSCsvshPGCLCs$diff.Methy<0,], "Higher methylation in hiPSCs" = DMRs_GRanges$hiPSCsvshPGCLCs[DMRs_GRanges$hiPSCsvshPGCLCs$diff.Methy>0,]), annotatePeak, TxDb=txdb_v35, tssRegion=c(-3000, 3000), verbose=FALSE)
ChIPseeker::plotAnnoBar(peakAnnoList)

ChIPseeker::plotDistToTSS(peakAnnoList)

Given a list of gene set, compareCluster function will compute profiles of each gene cluster.

genes = lapply(peakAnnoList, function(i) unique(as.data.frame(i)$geneId))
names(genes) = sub("_", "\n", names(genes))
any(duplicated(genes$`Higher methylation in hPGCLCs`))
## [1] FALSE
any(duplicated(genes$`Higher methylation in hiPSCs`))
## [1] FALSE

There are no duplicated ensembl ids

table(genes$`Higher methylation in hiPSCs` %in% Genes_df$ensembl_gene_id_version)
## 
## FALSE  TRUE 
##   465  1600
genes$`Higher methylation in hiPSCs` [!genes$`Higher methylation in hiPSCs` %in% Genes_df$ensembl_gene_id_version]
##   [1] "ENSG00000272849.1"  "ENSG00000224079.1"  "ENSG00000225339.4" 
##   [4] "ENSG00000275178.1"  "ENSG00000288549.1"  "ENSG00000275088.1" 
##   [7] "ENSG00000278716.1"  "ENSG00000253395.1"  "ENSG00000267665.2" 
##  [10] "ENSG00000286295.1"  "ENSG00000233052.1"  "ENSG00000230676.1" 
##  [13] "ENSG00000280326.1"  "ENSG00000237326.1"  "ENSG00000263017.1" 
##  [16] "ENSG00000286535.1"  "ENSG00000267036.2"  "ENSG00000237371.1" 
##  [19] "ENSG00000285163.1"  "ENSG00000284691.1"  "ENSG00000285855.1" 
##  [22] "ENSG00000282718.1"  "ENSG00000279135.1"  "ENSG00000234275.4" 
##  [25] "ENSG00000287428.1"  "ENSG00000287407.1"  "ENSG00000259362.2" 
##  [28] "ENSG00000287823.1"  "ENSG00000253102.1"  "ENSG00000237126.8" 
##  [31] "ENSG00000254755.1"  "ENSG00000279412.1"  "ENSG00000276166.1" 
##  [34] "ENSG00000259341.2"  "ENSG00000238031.1"  "ENSG00000279065.1" 
##  [37] "ENSG00000272988.1"  "ENSG00000238010.1"  "ENSG00000270490.1" 
##  [40] "ENSG00000230526.1"  "ENSG00000241449.6"  "ENSG00000234793.1" 
##  [43] "ENSG00000268189.2"  "ENSG00000233891.8"  "ENSG00000205325.3" 
##  [46] "ENSG00000255845.1"  "ENSG00000249808.3"  "ENSG00000262652.1" 
##  [49] "ENSG00000199949.2"  "ENSG00000253397.1"  "ENSG00000231113.2" 
##  [52] "ENSG00000256564.1"  "ENSG00000258717.1"  "ENSG00000237372.4" 
##  [55] "ENSG00000176515.1"  "ENSG00000215381.3"  "ENSG00000270911.1" 
##  [58] "ENSG00000245688.1"  "ENSG00000279931.1"  "ENSG00000253543.1" 
##  [61] "ENSG00000203739.4"  "ENSG00000226409.3"  "ENSG00000286364.1" 
##  [64] "ENSG00000231482.3"  "ENSG00000228613.1"  "ENSG00000242757.1" 
##  [67] "ENSG00000262884.1"  "ENSG00000279711.1"  "ENSG00000258698.1" 
##  [70] "ENSG00000257883.1"  "ENSG00000279774.1"  "ENSG00000268750.7" 
##  [73] "ENSG00000261054.1"  "ENSG00000238232.1"  "ENSG00000286240.1" 
##  [76] "ENSG00000275741.1"  "ENSG00000284966.3"  "ENSG00000280444.1" 
##  [79] "ENSG00000287338.1"  "ENSG00000250577.1"  "ENSG00000267090.1" 
##  [82] "ENSG00000287161.1"  "ENSG00000287787.1"  "ENSG00000285773.1" 
##  [85] "ENSG00000228830.1"  "ENSG00000261976.2"  "ENSG00000235811.1" 
##  [88] "ENSG00000286286.1"  "ENSG00000216775.3"  "ENSG00000229660.1" 
##  [91] "ENSG00000256314.1"  "ENSG00000228154.3"  "ENSG00000226047.1" 
##  [94] "ENSG00000286610.1"  "ENSG00000261938.1"  "ENSG00000279705.1" 
##  [97] "ENSG00000286333.1"  "ENSG00000287046.1"  "ENSG00000201026.1" 
## [100] "ENSG00000230990.2"  "ENSG00000205037.2"  "ENSG00000253445.1" 
## [103] "ENSG00000234428.2"  "ENSG00000280004.1"  "ENSG00000269667.2" 
## [106] "ENSG00000269976.1"  "ENSG00000270124.1"  "ENSG00000249236.2" 
## [109] "ENSG00000259961.1"  "ENSG00000286805.1"  "ENSG00000248962.1" 
## [112] "ENSG00000287073.1"  "ENSG00000274373.1"  "ENSG00000235994.4" 
## [115] "ENSG00000254607.2"  "ENSG00000230163.1"  "ENSG00000282952.1" 
## [118] "ENSG00000284666.1"  "ENSG00000231539.1"  "ENSG00000267364.2" 
## [121] "ENSG00000278330.1"  "ENSG00000253307.1"  "ENSG00000231876.8" 
## [124] "ENSG00000226868.1"  "ENSG00000287632.1"  "ENSG00000242009.2" 
## [127] "ENSG00000231170.7"  "ENSG00000279076.1"  "ENSG00000206814.1" 
## [130] "ENSG00000282097.1"  "ENSG00000253140.2"  "ENSG00000229628.1" 
## [133] "ENSG00000226169.1"  "ENSG00000279668.3"  "ENSG00000250338.1" 
## [136] "ENSG00000250409.1"  "ENSG00000275155.1"  "ENSG00000279841.1" 
## [139] "ENSG00000232408.1"  "ENSG00000254027.1"  "ENSG00000256955.2" 
## [142] "ENSG00000225028.1"  "ENSG00000273160.1"  "ENSG00000287089.1" 
## [145] "ENSG00000254933.1"  "ENSG00000287235.1"  "ENSG00000250230.3" 
## [148] "ENSG00000260473.2"  "ENSG00000226542.1"  "ENSG00000234949.2" 
## [151] "ENSG00000224545.1"  "ENSG00000236676.1"  "ENSG00000256481.1" 
## [154] "ENSG00000254364.1"  "ENSG00000201196.1"  "ENSG00000196696.13"
## [157] "ENSG00000225140.1"  "ENSG00000279261.2"  "ENSG00000203647.2" 
## [160] "ENSG00000287811.1"  "ENSG00000217455.8"  "ENSG00000287756.1" 
## [163] "ENSG00000231073.1"  "ENSG00000225092.2"  "ENSG00000219133.2" 
## [166] "ENSG00000254111.3"  "ENSG00000259920.1"  "ENSG00000261606.5" 
## [169] "ENSG00000254777.5"  "ENSG00000287680.1"  "ENSG00000235538.3" 
## [172] "ENSG00000279430.1"  "ENSG00000286198.1"  "ENSG00000254205.1" 
## [175] "ENSG00000230258.6"  "ENSG00000259252.1"  "ENSG00000287457.1" 
## [178] "ENSG00000250576.1"  "ENSG00000288040.1"  "ENSG00000254290.1" 
## [181] "ENSG00000226087.2"  "ENSG00000286635.1"  "ENSG00000278973.1" 
## [184] "ENSG00000233981.1"  "ENSG00000262869.1"  "ENSG00000201042.1" 
## [187] "ENSG00000227527.2"  "ENSG00000228361.2"  "ENSG00000254559.1" 
## [190] "ENSG00000286502.1"  "ENSG00000284705.1"  "ENSG00000201549.1" 
## [193] "ENSG00000243491.1"  "ENSG00000237720.2"  "ENSG00000236800.1" 
## [196] "ENSG00000224195.1"  "ENSG00000283782.2"  "ENSG00000237987.1" 
## [199] "ENSG00000286607.1"  "ENSG00000237617.1"  "ENSG00000276476.3" 
## [202] "ENSG00000260185.1"  "ENSG00000248359.2"  "ENSG00000225315.2" 
## [205] "ENSG00000287162.1"  "ENSG00000234378.1"  "ENSG00000288605.1" 
## [208] "ENSG00000287828.1"  "ENSG00000286498.1"  "ENSG00000279526.1" 
## [211] "ENSG00000254789.3"  "ENSG00000286471.1"  "ENSG00000285707.1" 
## [214] "ENSG00000232716.2"  "ENSG00000279087.1"  "ENSG00000280270.1" 
## [217] "ENSG00000287822.1"  "ENSG00000258560.1"  "ENSG00000286641.1" 
## [220] "ENSG00000262445.3"  "ENSG00000225156.2"  "ENSG00000250007.7" 
## [223] "ENSG00000260454.2"  "ENSG00000272097.2"  "ENSG00000242531.1" 
## [226] "ENSG00000229321.2"  "ENSG00000258935.1"  "ENSG00000235840.1" 
## [229] "ENSG00000248645.1"  "ENSG00000261555.1"  "ENSG00000229751.1" 
## [232] "ENSG00000261702.2"  "ENSG00000284188.2"  "ENSG00000249199.2" 
## [235] "ENSG00000221461.2"  "ENSG00000251270.1"  "ENSG00000261189.1" 
## [238] "ENSG00000256452.1"  "ENSG00000231024.1"  "ENSG00000229673.1" 
## [241] "ENSG00000249727.2"  "ENSG00000231078.1"  "ENSG00000271114.1" 
## [244] "ENSG00000226706.1"  "ENSG00000283511.1"  "ENSG00000228683.1" 
## [247] "ENSG00000286320.1"  "ENSG00000259579.1"  "ENSG00000226927.1" 
## [250] "ENSG00000268051.1"  "ENSG00000286356.1"  "ENSG00000277825.1" 
## [253] "ENSG00000277319.1"  "ENSG00000217514.1"  "ENSG00000273209.1" 
## [256] "ENSG00000233633.2"  "ENSG00000212589.1"  "ENSG00000258598.1" 
## [259] "ENSG00000263723.1"  "ENSG00000288079.1"  "ENSG00000287560.1" 
## [262] "ENSG00000279409.1"  "ENSG00000267245.1"  "ENSG00000235023.1" 
## [265] "ENSG00000286963.1"  "ENSG00000201584.1"  "ENSG00000248521.1" 
## [268] "ENSG00000235563.1"  "ENSG00000285804.2"  "ENSG00000236936.1" 
## [271] "ENSG00000274379.1"  "ENSG00000283128.2"  "ENSG00000260004.1" 
## [274] "ENSG00000280217.1"  "ENSG00000262966.2"  "ENSG00000228345.2" 
## [277] "ENSG00000201541.1"  "ENSG00000285534.1"  "ENSG00000260640.1" 
## [280] "ENSG00000287882.1"  "ENSG00000286119.1"  "ENSG00000287145.1" 
## [283] "ENSG00000268080.2"  "ENSG00000231951.1"  "ENSG00000280354.1" 
## [286] "ENSG00000225632.1"  "ENSG00000288639.1"  "ENSG00000259420.5" 
## [289] "ENSG00000261472.1"  "ENSG00000286390.1"  "ENSG00000212580.1" 
## [292] "ENSG00000280222.1"  "ENSG00000246792.3"  "ENSG00000267658.1" 
## [295] "ENSG00000262810.1"  "ENSG00000235240.1"  "ENSG00000226096.1" 
## [298] "ENSG00000251445.1"  "ENSG00000249930.1"  "ENSG00000237422.2" 
## [301] "ENSG00000235096.2"  "ENSG00000233096.1"  "ENSG00000286526.1" 
## [304] "ENSG00000231251.1"  "ENSG00000261723.1"  "ENSG00000234967.2" 
## [307] "ENSG00000254458.1"  "ENSG00000244260.2"  "ENSG00000249213.1" 
## [310] "ENSG00000227920.3"  "ENSG00000229618.3"  "ENSG00000235886.1" 
## [313] "ENSG00000271360.1"  "ENSG00000272922.1"  "ENSG00000224099.2" 
## [316] "ENSG00000234017.1"  "ENSG00000261177.1"  "ENSG00000261095.1" 
## [319] "ENSG00000270531.1"  "ENSG00000230385.1"  "ENSG00000272715.1" 
## [322] "ENSG00000202318.1"  "ENSG00000285775.1"  "ENSG00000236856.1" 
## [325] "ENSG00000224794.2"  "ENSG00000232072.1"  "ENSG00000224731.1" 
## [328] "ENSG00000250192.1"  "ENSG00000263033.2"  "ENSG00000199196.1" 
## [331] "ENSG00000285367.1"  "ENSG00000284652.1"  "ENSG00000230404.1" 
## [334] "ENSG00000286505.1"  "ENSG00000224815.3"  "ENSG00000254519.5" 
## [337] "ENSG00000269919.1"  "ENSG00000254002.2"  "ENSG00000227775.3" 
## [340] "ENSG00000264750.1"  "ENSG00000223027.1"  "ENSG00000248702.1" 
## [343] "ENSG00000287923.1"  "ENSG00000248753.1"  "ENSG00000280392.1" 
## [346] "ENSG00000226965.2"  "ENSG00000255446.1"  "ENSG00000288615.1" 
## [349] "ENSG00000220739.2"  "ENSG00000249738.10" "ENSG00000259200.2" 
## [352] "ENSG00000286757.2"  "ENSG00000203392.3"  "ENSG00000253051.1" 
## [355] "ENSG00000230233.1"  "ENSG00000226398.1"  "ENSG00000230647.2" 
## [358] "ENSG00000278071.1"  "ENSG00000232057.1"  "ENSG00000260788.6" 
## [361] "ENSG00000272215.1"  "ENSG00000279778.1"  "ENSG00000286834.1" 
## [364] "ENSG00000262116.1"  "ENSG00000186244.7"  "ENSG00000268926.3" 
## [367] "ENSG00000277831.1"  "ENSG00000258399.7"  "ENSG00000212590.1" 
## [370] "ENSG00000253608.2"  "ENSG00000197099.8"  "ENSG00000267455.1" 
## [373] "ENSG00000233969.2"  "ENSG00000213076.3"  "ENSG00000259202.1" 
## [376] "ENSG00000285623.1"  "ENSG00000270174.1"  "ENSG00000248128.1" 
## [379] "ENSG00000271384.1"  "ENSG00000287903.1"  "ENSG00000256221.1" 
## [382] "ENSG00000229607.1"  "ENSG00000255021.1"  "ENSG00000256708.1" 
## [385] "ENSG00000271709.1"  "ENSG00000271795.1"  "ENSG00000230698.1" 
## [388] "ENSG00000249642.2"  "ENSG00000287726.1"  "ENSG00000228873.1" 
## [391] "ENSG00000267579.1"  "ENSG00000242444.3"  "ENSG00000251010.1" 
## [394] "ENSG00000261248.1"  "ENSG00000263499.1"  "ENSG00000237920.2" 
## [397] "ENSG00000280047.1"  "ENSG00000286276.1"  "ENSG00000255605.1" 
## [400] "ENSG00000243116.1"  "ENSG00000224593.1"  "ENSG00000270927.1" 
## [403] "ENSG00000241354.2"  "ENSG00000277938.1"  "ENSG00000213065.2" 
## [406] "ENSG00000255545.8"  "ENSG00000228176.1"  "ENSG00000260304.1" 
## [409] "ENSG00000259868.3"  "ENSG00000226571.2"  "ENSG00000259989.1" 
## [412] "ENSG00000237844.2"  "ENSG00000254321.2"  "ENSG00000233834.6" 
## [415] "ENSG00000266101.1"  "ENSG00000254538.1"  "ENSG00000271269.1" 
## [418] "ENSG00000203446.2"  "ENSG00000224414.1"  "ENSG00000231057.4" 
## [421] "ENSG00000270591.1"  "ENSG00000279086.1"  "ENSG00000224322.2" 
## [424] "ENSG00000283078.1"  "ENSG00000226957.1"  "ENSG00000258760.1" 
## [427] "ENSG00000201393.1"  "ENSG00000260404.3"  "ENSG00000215450.2" 
## [430] "ENSG00000264754.1"  "ENSG00000207210.1"  "ENSG00000287220.1" 
## [433] "ENSG00000199751.1"  "ENSG00000270120.1"  "ENSG00000287175.1" 
## [436] "ENSG00000238503.1"  "ENSG00000236389.1"  "ENSG00000286101.1" 
## [439] "ENSG00000261347.1"  "ENSG00000234929.2"  "ENSG00000199200.2" 
## [442] "ENSG00000287975.1"  "ENSG00000275880.1"  "ENSG00000226266.6" 
## [445] "ENSG00000248733.1"  "ENSG00000286948.1"  "ENSG00000276332.1" 
## [448] "ENSG00000254001.5"  "ENSG00000223727.6"  "ENSG00000250961.2" 
## [451] "ENSG00000234223.2"  "ENSG00000280015.1"  "ENSG00000228108.1" 
## [454] "ENSG00000257726.1"  "ENSG00000236466.2"  "ENSG00000287387.1" 
## [457] "ENSG00000283579.1"  "ENSG00000236842.1"  "ENSG00000234860.2" 
## [460] "ENSG00000260171.1"  "ENSG00000258620.2"  "ENSG00000253354.2" 
## [463] "ENSG00000223967.1"  "ENSG00000271475.1"  "ENSG00000257258.2"
table(genes$`Higher methylation in hPGCLCs`%in% Genes_df$ensembl_gene_id_version)
## 
## FALSE  TRUE 
##     9    65
DMG_uphPGCLCsvshiPSCs <- Genes_df[Genes_df$ensembl_gene_id_version %in% genes$`Higher methylation in hPGCLCs`, "entrez_gene_id"]
DMG_uphPGCLCsvshiPSCs <- na.omit(DMG_uphPGCLCsvshiPSCs[!DMG_uphPGCLCsvshiPSCs %in% ""])
DMG_downhPGCLCsvshiPSCs <- Genes_df[Genes_df$ensembl_gene_id_version %in% genes$`Higher methylation in hiPSCs`, "entrez_gene_id"]
DMG_downhPGCLCsvshiPSCs <- na.omit(DMG_downhPGCLCsvshiPSCs[!DMG_downhPGCLCsvshiPSCs %in% ""])
any(duplicated(DMG_uphPGCLCsvshiPSCs))
## [1] FALSE
any(duplicated(DMG_downhPGCLCsvshiPSCs))
## [1] FALSE
genes$`Higher methylation in hPGCLCs` <- DMG_uphPGCLCsvshiPSCs
genes$`Higher methylation in hiPSCs` <- DMG_downhPGCLCsvshiPSCs
compKEGG <- compareCluster(geneCluster   = genes,
                         fun           = "enrichKEGG",
                         organism="hsa",
                         pvalueCutoff  = 0.05,
                         pAdjustMethod = "BH", 
                         universe = GeneUniverse_entrezid)

There are no significant terms (They have Padj>0.05)

dotplot(compKEGG, showCategory = 15, title = "KEGG Pathway Enrichment Analysis")

compGO <- compareCluster(geneCluster   = genes,
                         fun           = "enrichGO",
                         keyType = "ENTREZID",
                         pvalueCutoff  = 0.05,
                         pAdjustMethod = "BH", 
                         OrgDb='org.Hs.eg.db', 
                         universe=GeneUniverse_entrezid,
                         ont = "ALL")
dotplot(compGO, showCategory = 15, title = "GO Enrichment Analysis")

A new DMRs_annotated object is created in which annotation from ChIPseeker is added.

mcols(DMRs_annotated$hiPSCsvshPGCLCs)$ChipSeekerAnn <- NA
mcols(DMRs_annotated$hiPSCsvshPGCLCs)$ChipSeekerDistanceToTss <- NA
mcols(DMRs_annotated$hiPSCsvshPGCLCs)$ChipSeekerGeneId <- NA

m <- findOverlaps(DMRs_annotated$hiPSCsvshPGCLCs, peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)
mcols(DMRs_annotated$hiPSCsvshPGCLCs)[queryHits(m), "ChipSeekerAnn"] <- mcols(peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)[subjectHits(m), "annotation"]
mcols(DMRs_annotated$hiPSCsvshPGCLCs)[queryHits(m), "ChipSeekerDistanceToTss"] <- mcols(peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)[subjectHits(m), "distanceToTSS"]
mcols(DMRs_annotated$hiPSCsvshPGCLCs)[queryHits(m), "ChipSeekerGeneId"] <- mcols(peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)[subjectHits(m), "geneId"]

m <- findOverlaps(DMRs_annotated$hiPSCsvshPGCLCs, peakAnnoList[["Higher methylation in hiPSCs"]]@anno)
mcols(DMRs_annotated$hiPSCsvshPGCLCs)[queryHits(m), "ChipSeekerAnn"] <- mcols(peakAnnoList[["Higher methylation in hiPSCs"]]@anno)[subjectHits(m), "annotation"]
mcols(DMRs_annotated$hiPSCsvshPGCLCs)[queryHits(m), "ChipSeekerDistanceToTss"] <- mcols(peakAnnoList[["Higher methylation in hiPSCs"]]@anno)[subjectHits(m), "distanceToTSS"]
mcols(DMRs_annotated$hiPSCsvshPGCLCs)[queryHits(m), "ChipSeekerGeneId"] <- mcols(peakAnnoList[["Higher methylation in hiPSCs"]]@anno)[subjectHits(m), "geneId"]

Adding entrez gene id, gene symbol and gene information

DMRs_annotated$hiPSCsvshPGCLCs <- as.data.frame(DMRs_annotated$hiPSCsvshPGCLCs)
DMRs_annotated$hiPSCsvshPGCLCs <- dplyr::left_join(DMRs_annotated$hiPSCsvshPGCLCs, Genes_df, by=c('ChipSeekerGeneId' = 'ensembl_gene_id_version')) 
DMRs_annotated$hiPSCsvshPGCLCs[!is.na(DMRs_annotated$hiPSCsvshPGCLCs$hgnc_symbol),] %>% select(-c("ChipSeekerDistanceToTss", "ChipSeekerGeneId", "ensembl_gene_id", "entrez_gene_id")) %>% distinct() %>%
    datatable(class = 'hover', rownames = FALSE, caption="DMRs - hiPSCsvshPGCLCs", extension='Buttons', escape = FALSE, 
              options = list(pageLength=20, dom='Bfrtip', autoWidth=TRUE, 
                             buttons=list(c('csv', 'excel'))))

4.4 iMeLCs vs hiPSCs

4.4.1 Genetic annotation and visualization with ChIPseeker

DMRs are now annotated to their associated genes and genomic regions using the annotatePeaks function of the ChIPseekerR package, using a TxDb object. The GenomicFeatures package uses TxDb objects to store transcript metadata. This class maps the 5’ and 3’ untranslated regions (UTRs), protein coding sequences (CDSs) and exons for a set of mRNA transcripts to their associated genome. All TxDb objects are backed by a SQLite database that manages genomic locations and the relationships between pre-processed mRNA transcripts, exons, protein coding sequences, and their related gene identifiers.

TxDb created by us

There are no DMRs where the methylation is higher in iMeLCs with respect to hiPSCs.

peakAnnoList <- annotatePeak(DMRs_GRanges$hiPSCsvsiMeLCs, TxDb=txdb_v35, tssRegion=c(-3000, 3000), verbose=FALSE)
ChIPseeker::plotAnnoBar(peakAnnoList)

ChIPseeker::plotDistToTSS(peakAnnoList)

Given a list of gene set, compareCluster function will compute profiles of each gene cluster.

genes = unique(as.data.frame(peakAnnoList)$geneId)
names(genes) = sub("_", "\n", names(genes))
any(duplicated(genes))
## [1] FALSE

There are no duplicated ensembl ids

table(genes %in% Genes_df$ensembl_gene_id_version)
## 
## FALSE  TRUE 
##    21    44
genes[!genes%in% Genes_df$ensembl_gene_id_version]
##                <NA>                <NA>                <NA>                <NA> 
## "ENSG00000272849.1" "ENSG00000259362.2" "ENSG00000228613.1" "ENSG00000262884.1" 
##                <NA>                <NA>                <NA>                <NA> 
## "ENSG00000271218.1" "ENSG00000284691.1" "ENSG00000238031.1" "ENSG00000203647.2" 
##                <NA>                <NA>                <NA>                <NA> 
## "ENSG00000249526.1" "ENSG00000282952.1" "ENSG00000201026.1" "ENSG00000255585.3" 
##                <NA>                <NA>                <NA>                <NA> 
## "ENSG00000267366.1" "ENSG00000236426.5" "ENSG00000285707.1" "ENSG00000229618.3" 
##                <NA>                <NA>                <NA>                <NA> 
## "ENSG00000246792.3" "ENSG00000230003.2" "ENSG00000286805.1" "ENSG00000229673.1" 
##                <NA> 
## "ENSG00000228361.2"
DMG_downiMeLCsvshiPSCs <- Genes_df[Genes_df$ensembl_gene_id_version %in% genes, "entrez_gene_id"]
DMG_downiMeLCsvshiPSCs <- na.omit(DMG_downiMeLCsvshiPSCs[!DMG_downiMeLCsvshiPSCs %in% ""])
any(duplicated(DMG_downiMeLCsvshiPSCs))
## [1] FALSE
genes <- DMG_downiMeLCsvshiPSCs
# compKEGG <- compareCluster(geneCluster   = genes,
#                          fun           = "enrichKEGG",
#                          organism="hsa",
#                          pvalueCutoff  = 0.05,
#                          pAdjustMethod = "BH", 
#                          universe = GeneUniverse_entrezid)
#dotplot(compKEGG, showCategory = 15, title = "KEGG Pathway Enrichment Analysis") #It fails because too few genes.
# compGO <- compareCluster(geneCluster   = genes,
#                          fun           = "enrichGO",
#                          keyType = "ENTREZID",
#                          pvalueCutoff  = 0.05,
#                          pAdjustMethod = "BH", 
#                          OrgDb='org.Hs.eg.db', 
#                          universe=GeneUniverse_entrezid,
#                          ont = "ALL")
#dotplot(compGO, showCategory = 15, title = "GO Enrichment Analysis")

A new DMRs_annotated object is created in which annotation from ChIPseeker is added.

mcols(DMRs_annotated$hiPSCsvsiMeLCs)$ChipSeekerAnn <- NA
mcols(DMRs_annotated$hiPSCsvsiMeLCs)$ChipSeekerDistanceToTss <- NA
mcols(DMRs_annotated$hiPSCsvsiMeLCs)$ChipSeekerGeneId <- NA

m <- findOverlaps(DMRs_annotated$hiPSCsvsiMeLCs, peakAnnoList@anno)
mcols(DMRs_annotated$hiPSCsvsiMeLCs)[queryHits(m), "ChipSeekerAnn"] <- mcols(peakAnnoList@anno)[subjectHits(m), "annotation"]
mcols(DMRs_annotated$hiPSCsvsiMeLCs)[queryHits(m), "ChipSeekerDistanceToTss"] <- mcols(peakAnnoList@anno)[subjectHits(m), "distanceToTSS"]
mcols(DMRs_annotated$hiPSCsvsiMeLCs)[queryHits(m), "ChipSeekerGeneId"] <- mcols(peakAnnoList@anno)[subjectHits(m), "geneId"]

Adding entrez gene id, gene symbol and gene information

DMRs_annotated$hiPSCsvsiMeLCs <- as.data.frame(DMRs_annotated$hiPSCsvsiMeLCs)
DMRs_annotated$hiPSCsvsiMeLCs <- dplyr::left_join(DMRs_annotated$hiPSCsvsiMeLCs, Genes_df, by=c('ChipSeekerGeneId' = 'ensembl_gene_id_version')) 
DMRs_annotated$hiPSCsvsiMeLCs[!is.na(DMRs_annotated$hiPSCsvsiMeLCs$hgnc_symbol),] %>% select(-c("ChipSeekerDistanceToTss", "ChipSeekerGeneId", "ensembl_gene_id", "entrez_gene_id")) %>% distinct() %>%
    datatable(class = 'hover', rownames = FALSE, caption="DMRs - hiPSCsvsiMeLCs", extension='Buttons', escape = FALSE, 
              options = list(pageLength=20, dom='Bfrtip', autoWidth=TRUE, 
                             buttons=list(c('csv', 'excel'))))

4.5 hPGCLCs vs iMeLCs

4.5.1 Genetic annotation and visualization with ChIPseeker

DMRs are now annotated to their associated genes and genomic regions using the annotatePeaks function of the ChIPseekerR package, using a TxDb object. The GenomicFeatures package uses TxDb objects to store transcript metadata. This class maps the 5’ and 3’ untranslated regions (UTRs), protein coding sequences (CDSs) and exons for a set of mRNA transcripts to their associated genome. All TxDb objects are backed by a SQLite database that manages genomic locations and the relationships between pre-processed mRNA transcripts, exons, protein coding sequences, and their related gene identifiers.

TxDb created by us

peakAnnoList <- lapply(list("Higher methylation in hPGCLCs" = DMRs_GRanges$iMeLCsvshPGCLCs[DMRs_GRanges$iMeLCsvshPGCLCs$diff.Methy<0,], "Higher methylation in iMeLCs" = DMRs_GRanges$iMeLCsvshPGCLCs[DMRs_GRanges$iMeLCsvshPGCLCs$diff.Methy>0,]), annotatePeak, TxDb=txdb_v35, tssRegion=c(-3000, 3000), verbose=FALSE)
ChIPseeker::plotAnnoBar(peakAnnoList)

ChIPseeker::plotDistToTSS(peakAnnoList)

Given a list of gene set, compareCluster function will compute profiles of each gene cluster.

genes = lapply(peakAnnoList, function(i) unique(as.data.frame(i)$geneId))
names(genes) = sub("_", "\n", names(genes))
any(duplicated(genes$`Higher methylation in hPGCLCs`))
## [1] FALSE
any(duplicated(genes$`Higher methylation in iMeLCs`))
## [1] FALSE

There are no duplicated ensembl ids

table(genes$`Higher methylation in iMeLCs` %in% Genes_df$ensembl_gene_id_version)
## 
## FALSE  TRUE 
##   284   960
genes$`Higher methylation in iMeLCs` [!genes$`Higher methylation in iMeLCs` %in% Genes_df$ensembl_gene_id_version]
##   [1] "ENSG00000225339.4" "ENSG00000224079.1" "ENSG00000275178.1"
##   [4] "ENSG00000288549.1" "ENSG00000275088.1" "ENSG00000253395.1"
##   [7] "ENSG00000233052.1" "ENSG00000267665.2" "ENSG00000278716.1"
##  [10] "ENSG00000282718.1" "ENSG00000286295.1" "ENSG00000280326.1"
##  [13] "ENSG00000237326.1" "ENSG00000285163.1" "ENSG00000285855.1"
##  [16] "ENSG00000263017.1" "ENSG00000230676.1" "ENSG00000286535.1"
##  [19] "ENSG00000287823.1" "ENSG00000279412.1" "ENSG00000254755.1"
##  [22] "ENSG00000237126.8" "ENSG00000287428.1" "ENSG00000215381.3"
##  [25] "ENSG00000276166.1" "ENSG00000267036.2" "ENSG00000234275.4"
##  [28] "ENSG00000241449.6" "ENSG00000272988.1" "ENSG00000231113.2"
##  [31] "ENSG00000262652.1" "ENSG00000233891.8" "ENSG00000249808.3"
##  [34] "ENSG00000230526.1" "ENSG00000176515.1" "ENSG00000279065.1"
##  [37] "ENSG00000256564.1" "ENSG00000237372.4" "ENSG00000234793.1"
##  [40] "ENSG00000259341.2" "ENSG00000279931.1" "ENSG00000255845.1"
##  [43] "ENSG00000199949.2" "ENSG00000286240.1" "ENSG00000287787.1"
##  [46] "ENSG00000237371.1" "ENSG00000267090.1" "ENSG00000279711.1"
##  [49] "ENSG00000286364.1" "ENSG00000245688.1" "ENSG00000257883.1"
##  [52] "ENSG00000270911.1" "ENSG00000268750.7" "ENSG00000287338.1"
##  [55] "ENSG00000250577.1" "ENSG00000285773.1" "ENSG00000228154.3"
##  [58] "ENSG00000226409.3" "ENSG00000230163.1" "ENSG00000287161.1"
##  [61] "ENSG00000272849.1" "ENSG00000280444.1" "ENSG00000286333.1"
##  [64] "ENSG00000253543.1" "ENSG00000279705.1" "ENSG00000287046.1"
##  [67] "ENSG00000280004.1" "ENSG00000270972.1" "ENSG00000230990.2"
##  [70] "ENSG00000238010.1" "ENSG00000261938.1" "ENSG00000261976.2"
##  [73] "ENSG00000269667.2" "ENSG00000286286.1" "ENSG00000234428.2"
##  [76] "ENSG00000254933.1" "ENSG00000253445.1" "ENSG00000278330.1"
##  [79] "ENSG00000253307.1" "ENSG00000258698.1" "ENSG00000216775.3"
##  [82] "ENSG00000203739.4" "ENSG00000286610.1" "ENSG00000249236.2"
##  [85] "ENSG00000253140.2" "ENSG00000231539.1" "ENSG00000256481.1"
##  [88] "ENSG00000250230.3" "ENSG00000226047.1" "ENSG00000227527.2"
##  [91] "ENSG00000235994.4" "ENSG00000273160.1" "ENSG00000254777.5"
##  [94] "ENSG00000287073.1" "ENSG00000286607.1" "ENSG00000229628.1"
##  [97] "ENSG00000279261.2" "ENSG00000248962.1" "ENSG00000287235.1"
## [100] "ENSG00000287089.1" "ENSG00000253397.1" "ENSG00000236676.1"
## [103] "ENSG00000219133.2" "ENSG00000254111.3" "ENSG00000260185.1"
## [106] "ENSG00000259920.1" "ENSG00000279076.1" "ENSG00000225028.1"
## [109] "ENSG00000225315.2" "ENSG00000254559.1" "ENSG00000232408.1"
## [112] "ENSG00000254290.1" "ENSG00000260473.2" "ENSG00000250576.1"
## [115] "ENSG00000262445.3" "ENSG00000272097.2" "ENSG00000201042.1"
## [118] "ENSG00000238031.1" "ENSG00000248645.1" "ENSG00000287756.1"
## [121] "ENSG00000262869.1" "ENSG00000256452.1" "ENSG00000261606.5"
## [124] "ENSG00000280270.1" "ENSG00000258560.1" "ENSG00000250007.7"
## [127] "ENSG00000231876.8" "ENSG00000275155.1" "ENSG00000272922.1"
## [130] "ENSG00000232716.2" "ENSG00000267245.1" "ENSG00000242531.1"
## [133] "ENSG00000279526.1" "ENSG00000221461.2" "ENSG00000258935.1"
## [136] "ENSG00000235538.3" "ENSG00000251270.1" "ENSG00000226542.1"
## [139] "ENSG00000254205.1" "ENSG00000280354.1" "ENSG00000286635.1"
## [142] "ENSG00000201549.1" "ENSG00000225187.1" "ENSG00000287822.1"
## [145] "ENSG00000249727.2" "ENSG00000256955.2" "ENSG00000259579.1"
## [148] "ENSG00000212589.1" "ENSG00000226087.2" "ENSG00000226868.1"
## [151] "ENSG00000225092.2" "ENSG00000250338.1" "ENSG00000271114.1"
## [154] "ENSG00000217514.1" "ENSG00000233633.2" "ENSG00000286864.1"
## [157] "ENSG00000261702.2" "ENSG00000261095.1" "ENSG00000229321.2"
## [160] "ENSG00000234949.2" "ENSG00000277319.1" "ENSG00000231482.3"
## [163] "ENSG00000283128.2" "ENSG00000254789.3" "ENSG00000226927.1"
## [166] "ENSG00000286641.1" "ENSG00000243491.1" "ENSG00000263723.1"
## [169] "ENSG00000235840.1" "ENSG00000269919.1" "ENSG00000280217.1"
## [172] "ENSG00000201196.1" "ENSG00000226706.1" "ENSG00000201584.1"
## [175] "ENSG00000274379.1" "ENSG00000287882.1" "ENSG00000276476.3"
## [178] "ENSG00000279409.1" "ENSG00000261723.1" "ENSG00000202318.1"
## [181] "ENSG00000201541.1" "ENSG00000234378.1" "ENSG00000260454.2"
## [184] "ENSG00000287145.1" "ENSG00000262810.1" "ENSG00000248702.1"
## [187] "ENSG00000235023.1" "ENSG00000260788.6" "ENSG00000227920.3"
## [190] "ENSG00000226096.1" "ENSG00000227775.3" "ENSG00000235096.2"
## [193] "ENSG00000254364.1" "ENSG00000224545.1" "ENSG00000254458.1"
## [196] "ENSG00000272715.1" "ENSG00000272485.1" "ENSG00000259200.2"
## [199] "ENSG00000203392.3" "ENSG00000261177.1" "ENSG00000280222.1"
## [202] "ENSG00000261472.1" "ENSG00000288079.1" "ENSG00000278071.1"
## [205] "ENSG00000288639.1" "ENSG00000248359.2" "ENSG00000259961.1"
## [208] "ENSG00000263508.6" "ENSG00000288615.1" "ENSG00000285623.1"
## [211] "ENSG00000224731.1" "ENSG00000236800.1" "ENSG00000253051.1"
## [214] "ENSG00000241354.2" "ENSG00000273209.1" "ENSG00000230404.1"
## [217] "ENSG00000228345.2" "ENSG00000230233.1" "ENSG00000286356.1"
## [220] "ENSG00000186244.7" "ENSG00000235886.1" "ENSG00000233981.1"
## [223] "ENSG00000212580.1" "ENSG00000197099.8" "ENSG00000284652.1"
## [226] "ENSG00000248128.1" "ENSG00000285367.1" "ENSG00000270174.1"
## [229] "ENSG00000253608.2" "ENSG00000267658.1" "ENSG00000248521.1"
## [232] "ENSG00000271269.1" "ENSG00000249930.1" "ENSG00000258399.7"
## [235] "ENSG00000272215.1" "ENSG00000268926.3" "ENSG00000232072.1"
## [238] "ENSG00000230647.2" "ENSG00000234017.1" "ENSG00000255545.8"
## [241] "ENSG00000255021.1" "ENSG00000285775.1" "ENSG00000228613.1"
## [244] "ENSG00000229607.1" "ENSG00000267579.1" "ENSG00000260004.1"
## [247] "ENSG00000286276.1" "ENSG00000226169.1" "ENSG00000213076.3"
## [250] "ENSG00000199751.1" "ENSG00000250409.1" "ENSG00000280015.1"
## [253] "ENSG00000259252.1" "ENSG00000266101.1" "ENSG00000254321.2"
## [256] "ENSG00000207210.1" "ENSG00000258760.1" "ENSG00000237920.2"
## [259] "ENSG00000287903.1" "ENSG00000248733.1" "ENSG00000199200.2"
## [262] "ENSG00000256708.1" "ENSG00000270120.1" "ENSG00000226398.1"
## [265] "ENSG00000254538.1" "ENSG00000254607.2" "ENSG00000270591.1"
## [268] "ENSG00000276332.1" "ENSG00000285534.1" "ENSG00000226965.2"
## [271] "ENSG00000277831.1" "ENSG00000236466.2" "ENSG00000238503.1"
## [274] "ENSG00000270927.1" "ENSG00000271795.1" "ENSG00000255605.1"
## [277] "ENSG00000242444.3" "ENSG00000226266.6" "ENSG00000203446.2"
## [280] "ENSG00000280047.1" "ENSG00000277050.1" "ENSG00000286948.1"
## [283] "ENSG00000234860.2" "ENSG00000223727.6"
table(genes$`Higher methylation in hPGCLCs`%in% Genes_df$ensembl_gene_id_version)
## 
## FALSE  TRUE 
##     8    63
DMG_uphPGCLCsvsiMeLCs <- Genes_df[Genes_df$ensembl_gene_id_version %in% genes$`Higher methylation in hPGCLCs`, "entrez_gene_id"]
DMG_uphPGCLCsvsiMeLCs <- na.omit(DMG_uphPGCLCsvsiMeLCs[!DMG_uphPGCLCsvsiMeLCs %in% ""])
DMG_downhPGCLCsvsiMeLCs <- Genes_df[Genes_df$ensembl_gene_id_version %in% genes$`Higher methylation in iMeLCs`, "entrez_gene_id"]
DMG_downhPGCLCsvsiMeLCs <- na.omit(DMG_downhPGCLCsvsiMeLCs[!DMG_downhPGCLCsvsiMeLCs %in% ""])
any(duplicated(DMG_uphPGCLCsvsiMeLCs))
## [1] FALSE
any(duplicated(DMG_downhPGCLCsvsiMeLCs))
## [1] FALSE
genes$`Higher methylation in hPGCLCs` <- DMG_uphPGCLCsvsiMeLCs
genes$`Higher methylation in iMeLCs` <- DMG_downhPGCLCsvsiMeLCs
compKEGG <- compareCluster(geneCluster   = genes,
                         fun           = "enrichKEGG",
                         organism="hsa",
                         pvalueCutoff  = 0.05,
                         pAdjustMethod = "BH", 
                         universe = GeneUniverse_entrezid)
## Warning in compareCluster(geneCluster = genes, fun = "enrichKEGG", organism =
## "hsa", : No enrichment found in any of gene cluster, please check your input...

There are no significant terms (They have Padj>0.05)

if (!is.null(compKEGG)) {
  dotplot(compKEGG, showCategory = 15, title = "KEGG Pathway Enrichment Analysis")
  }
compGO <- compareCluster(geneCluster   = genes,
                         fun           = "enrichGO",
                         keyType = "ENTREZID",
                         pvalueCutoff  = 0.05,
                         pAdjustMethod = "BH", 
                         OrgDb='org.Hs.eg.db', 
                         universe=GeneUniverse_entrezid,
                         ont = "ALL")
dotplot(compGO, showCategory = 15, title = "GO Enrichment Analysis")

A new DMRs_annotated object is created in which annotation from ChIPseeker is added.

mcols(DMRs_annotated$iMeLCsvshPGCLCs)$ChipSeekerAnn <- NA
mcols(DMRs_annotated$iMeLCsvshPGCLCs)$ChipSeekerDistanceToTss <- NA
mcols(DMRs_annotated$iMeLCsvshPGCLCs)$ChipSeekerGeneId <- NA

m <- findOverlaps(DMRs_annotated$iMeLCsvshPGCLCs, peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)
mcols(DMRs_annotated$iMeLCsvshPGCLCs)[queryHits(m), "ChipSeekerAnn"] <- mcols(peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)[subjectHits(m), "annotation"]
mcols(DMRs_annotated$iMeLCsvshPGCLCs)[queryHits(m), "ChipSeekerDistanceToTss"] <- mcols(peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)[subjectHits(m), "distanceToTSS"]
mcols(DMRs_annotated$iMeLCsvshPGCLCs)[queryHits(m), "ChipSeekerGeneId"] <- mcols(peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)[subjectHits(m), "geneId"]

m <- findOverlaps(DMRs_annotated$iMeLCsvshPGCLCs, peakAnnoList[["Higher methylation in iMeLCs"]]@anno)
mcols(DMRs_annotated$iMeLCsvshPGCLCs)[queryHits(m), "ChipSeekerAnn"] <- mcols(peakAnnoList[["Higher methylation in iMeLCs"]]@anno)[subjectHits(m), "annotation"]
mcols(DMRs_annotated$iMeLCsvshPGCLCs)[queryHits(m), "ChipSeekerDistanceToTss"] <- mcols(peakAnnoList[["Higher methylation in iMeLCs"]]@anno)[subjectHits(m), "distanceToTSS"]
mcols(DMRs_annotated$iMeLCsvshPGCLCs)[queryHits(m), "ChipSeekerGeneId"] <- mcols(peakAnnoList[["Higher methylation in iMeLCs"]]@anno)[subjectHits(m), "geneId"]

Adding entrez gene id, gene symbol and gene information

DMRs_annotated$iMeLCsvshPGCLCs <- as.data.frame(DMRs_annotated$iMeLCsvshPGCLCs)
DMRs_annotated$iMeLCsvshPGCLCs <- dplyr::left_join(DMRs_annotated$iMeLCsvshPGCLCs, Genes_df, by=c('ChipSeekerGeneId' = 'ensembl_gene_id_version')) 
DMRs_annotated$iMeLCsvshPGCLCs[!is.na(DMRs_annotated$iMeLCsvshPGCLCs$hgnc_symbol),] %>% select(-c("ChipSeekerDistanceToTss", "ChipSeekerGeneId", "ensembl_gene_id", "entrez_gene_id")) %>% distinct() %>%
    datatable(class = 'hover', rownames = FALSE, caption="DMRs - iMeLCsvshPGCLCs", extension='Buttons', escape = FALSE, 
              options = list(pageLength=20, dom='Bfrtip', autoWidth=TRUE, 
                             buttons=list(c('csv', 'excel'))))

4.6 iMeLCs VS hEGCLCs

4.6.1 Genetic annotation and visualization with ChIPseeker

DMRs are now annotated to their associated genes and genomic regions using the annotatePeaks function of the ChIPseekerR package, using a TxDb object. The GenomicFeatures package uses TxDb objects to store transcript metadata. This class maps the 5’ and 3’ untranslated regions (UTRs), protein coding sequences (CDSs) and exons for a set of mRNA transcripts to their associated genome. All TxDb objects are backed by a SQLite database that manages genomic locations and the relationships between pre-processed mRNA transcripts, exons, protein coding sequences, and their related gene identifiers.

TxDb created by us

  peakAnnoList <- lapply(list("Higher methylation in iMeLCs" = DMRs_GRanges$hEGCLCsvsiMeLCs[DMRs_GRanges$hEGCLCsvsiMeLCs$diff.Methy<0,], "Higher methylation in hEGCLCs" = DMRs_GRanges$hEGCLCsvsiMeLCs[DMRs_GRanges$hEGCLCsvsiMeLCs$diff.Methy>0,]), annotatePeak, TxDb=txdb_v35, tssRegion=c(-3000, 3000), verbose=FALSE)
ChIPseeker::plotAnnoBar(peakAnnoList)

ChIPseeker::plotDistToTSS(peakAnnoList)

Given a list of gene set, compareCluster function will compute profiles of each gene cluster.

genes = lapply(peakAnnoList, function(i) unique(as.data.frame(i)$geneId))
names(genes) = sub("_", "\n", names(genes))
any(duplicated(genes$`Higher methylation in hEGCLCs`))
## [1] FALSE
any(duplicated(genes$`Higher methylation in iMeLCs`))
## [1] FALSE

There are no duplicated ensembl ids

table(genes$`Higher methylation in iMeLCs` %in% Genes_df$ensembl_gene_id_version)
## 
## FALSE  TRUE 
##   162   528
genes$`Higher methylation in iMeLCs` [!genes$`Higher methylation in iMeLCs` %in% Genes_df$ensembl_gene_id_version]
##   [1] "ENSG00000274219.1"  "ENSG00000261842.1"  "ENSG00000240449.1" 
##   [4] "ENSG00000278000.1"  "ENSG00000279031.1"  "ENSG00000280353.1" 
##   [7] "ENSG00000251196.1"  "ENSG00000279611.1"  "ENSG00000270889.1" 
##  [10] "ENSG00000241014.2"  "ENSG00000274177.1"  "ENSG00000286717.1" 
##  [13] "ENSG00000285833.1"  "ENSG00000233820.2"  "ENSG00000267713.1" 
##  [16] "ENSG00000221093.1"  "ENSG00000269388.1"  "ENSG00000259607.1" 
##  [19] "ENSG00000287002.1"  "ENSG00000287570.1"  "ENSG00000261675.1" 
##  [22] "ENSG00000283075.1"  "ENSG00000250900.6"  "ENSG00000235538.3" 
##  [25] "ENSG00000230285.1"  "ENSG00000275201.1"  "ENSG00000229402.1" 
##  [28] "ENSG00000259789.2"  "ENSG00000240882.1"  "ENSG00000237661.1" 
##  [31] "ENSG00000275833.1"  "ENSG00000253942.1"  "ENSG00000217702.3" 
##  [34] "ENSG00000103200.6"  "ENSG00000233052.1"  "ENSG00000251250.3" 
##  [37] "ENSG00000230699.2"  "ENSG00000274977.1"  "ENSG00000225489.7" 
##  [40] "ENSG00000267202.5"  "ENSG00000260509.2"  "ENSG00000268230.5" 
##  [43] "ENSG00000255535.2"  "ENSG00000287060.1"  "ENSG00000285712.1" 
##  [46] "ENSG00000259983.1"  "ENSG00000267341.1"  "ENSG00000243818.5" 
##  [49] "ENSG00000233332.1"  "ENSG00000272915.1"  "ENSG00000285329.1" 
##  [52] "ENSG00000278722.1"  "ENSG00000269289.6"  "ENSG00000267786.1" 
##  [55] "ENSG00000280282.1"  "ENSG00000230537.2"  "ENSG00000267353.1" 
##  [58] "ENSG00000261460.1"  "ENSG00000230936.1"  "ENSG00000287742.1" 
##  [61] "ENSG00000277878.1"  "ENSG00000268886.1"  "ENSG00000288571.1" 
##  [64] "ENSG00000225868.7"  "ENSG00000229196.4"  "ENSG00000238277.1" 
##  [67] "ENSG00000261651.1"  "ENSG00000249149.2"  "ENSG00000251211.1" 
##  [70] "ENSG00000287486.1"  "ENSG00000250105.1"  "ENSG00000229679.1" 
##  [73] "ENSG00000261090.2"  "ENSG00000261720.1"  "ENSG00000234104.1" 
##  [76] "ENSG00000285829.1"  "ENSG00000276687.1"  "ENSG00000287744.1" 
##  [79] "ENSG00000226659.1"  "ENSG00000204850.4"  "ENSG00000259394.2" 
##  [82] "ENSG00000213150.2"  "ENSG00000266283.1"  "ENSG00000267040.6" 
##  [85] "ENSG00000286697.1"  "ENSG00000255240.6"  "ENSG00000270625.1" 
##  [88] "ENSG00000250971.3"  "ENSG00000250523.1"  "ENSG00000235939.1" 
##  [91] "ENSG00000226426.1"  "ENSG00000230033.1"  "ENSG00000256695.2" 
##  [94] "ENSG00000225106.1"  "ENSG00000259807.2"  "ENSG00000267648.1" 
##  [97] "ENSG00000276633.1"  "ENSG00000214210.4"  "ENSG00000230309.1" 
## [100] "ENSG00000286407.1"  "ENSG00000288561.1"  "ENSG00000272688.1" 
## [103] "ENSG00000267558.2"  "ENSG00000254877.1"  "ENSG00000272203.1" 
## [106] "ENSG00000280318.1"  "ENSG00000261076.1"  "ENSG00000197099.8" 
## [109] "ENSG00000286046.1"  "ENSG00000229312.2"  "ENSG00000285228.1" 
## [112] "ENSG00000279578.1"  "ENSG00000276476.3"  "ENSG00000233247.3" 
## [115] "ENSG00000224352.1"  "ENSG00000146722.11" "ENSG00000259520.6" 
## [118] "ENSG00000205584.6"  "ENSG00000174977.8"  "ENSG00000285741.1" 
## [121] "ENSG00000256029.6"  "ENSG00000270891.1"  "ENSG00000229299.2" 
## [124] "ENSG00000283633.1"  "ENSG00000224765.1"  "ENSG00000274397.1" 
## [127] "ENSG00000202459.1"  "ENSG00000262668.2"  "ENSG00000274660.1" 
## [130] "ENSG00000237947.1"  "ENSG00000258385.1"  "ENSG00000257512.1" 
## [133] "ENSG00000230964.1"  "ENSG00000234535.1"  "ENSG00000268677.1" 
## [136] "ENSG00000261213.1"  "ENSG00000259010.1"  "ENSG00000270131.1" 
## [139] "ENSG00000228532.4"  "ENSG00000286995.1"  "ENSG00000275649.1" 
## [142] "ENSG00000284734.1"  "ENSG00000256673.1"  "ENSG00000255182.2" 
## [145] "ENSG00000224273.2"  "ENSG00000255801.1"  "ENSG00000188525.4" 
## [148] "ENSG00000237525.7"  "ENSG00000285553.1"  "ENSG00000232328.3" 
## [151] "ENSG00000253028.1"  "ENSG00000238110.1"  "ENSG00000253633.2" 
## [154] "ENSG00000257657.3"  "ENSG00000258422.5"  "ENSG00000255910.2" 
## [157] "ENSG00000207176.1"  "ENSG00000264149.1"  "ENSG00000253585.1" 
## [160] "ENSG00000278078.1"  "ENSG00000288253.1"  "ENSG00000227459.1"
table(genes$`Higher methylation in hEGCLCs`%in% Genes_df$ensembl_gene_id_version)
## 
## FALSE  TRUE 
##    27    61
DMG_uphEGCLCsvsiMeLCs <- Genes_df[Genes_df$ensembl_gene_id_version %in% genes$`Higher methylation in hEGCLCs`, "entrez_gene_id"]
DMG_uphEGCLCsvsiMeLCs <- na.omit(DMG_uphEGCLCsvsiMeLCs[!DMG_uphEGCLCsvsiMeLCs %in% ""])
DMG_downhEGCLCsvsiMeLCs <- Genes_df[Genes_df$ensembl_gene_id_version %in% genes$`Higher methylation in iMeLCs`, "entrez_gene_id"]
DMG_downhEGCLCsvsiMeLCs <- na.omit(DMG_downhEGCLCsvsiMeLCs[!DMG_downhEGCLCsvsiMeLCs %in% ""])
any(duplicated(DMG_uphEGCLCsvsiMeLCs))
## [1] FALSE
any(duplicated(DMG_downhEGCLCsvsiMeLCs))
## [1] FALSE
genes$`Higher methylation in hEGCLCs` <- DMG_uphEGCLCsvsiMeLCs
genes$`Higher methylation in iMeLCs` <- DMG_downhEGCLCsvsiMeLCs
compKEGG <- compareCluster(geneCluster   = genes,
                         fun           = "enrichKEGG",
                         organism="hsa",
                         pvalueCutoff  = 0.05,
                         pAdjustMethod = "BH", 
                         universe = GeneUniverse_entrezid)
## Warning in compareCluster(geneCluster = genes, fun = "enrichKEGG", organism =
## "hsa", : No enrichment found in any of gene cluster, please check your input...

There are no significant terms (They have Padj>0.05)

if (!is.null(compKEGG)) {
  dotplot(compKEGG, showCategory = 15, title = "KEGG Pathway Enrichment Analysis")
  }
compGO <- compareCluster(geneCluster   = genes,
                         fun           = "enrichGO",
                         keyType = "ENTREZID",
                         pvalueCutoff  = 0.05,
                         pAdjustMethod = "BH", 
                         OrgDb='org.Hs.eg.db', 
                         universe=GeneUniverse_entrezid,
                         ont = "ALL")
dotplot(compGO, showCategory = 15, title = "GO Enrichment Analysis")

A new DMRs_annotated object is created in which annotation from ChIPseeker is added.

mcols(DMRs_annotated$hEGCLCsvsiMeLCs)$ChipSeekerAnn <- NA
mcols(DMRs_annotated$hEGCLCsvsiMeLCs)$ChipSeekerDistanceToTss <- NA
mcols(DMRs_annotated$hEGCLCsvsiMeLCs)$ChipSeekerGeneId <- NA 

m <- findOverlaps(DMRs_annotated$hEGCLCsvsiMeLCs, peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)
mcols(DMRs_annotated$hEGCLCsvsiMeLCs)[queryHits(m), "ChipSeekerAnn"] <- mcols(peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)[subjectHits(m), "annotation"]
mcols(DMRs_annotated$hEGCLCsvsiMeLCs)[queryHits(m), "ChipSeekerDistanceToTss"] <- mcols(peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)[subjectHits(m), "distanceToTSS"]
mcols(DMRs_annotated$hEGCLCsvsiMeLCs)[queryHits(m), "ChipSeekerGeneId"] <- mcols(peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)[subjectHits(m), "geneId"]

m <- findOverlaps(DMRs_annotated$hEGCLCsvsiMeLCs, peakAnnoList[["Higher methylation in iMeLCs"]]@anno)
mcols(DMRs_annotated$hEGCLCsvsiMeLCs)[queryHits(m), "ChipSeekerAnn"] <- mcols(peakAnnoList[["Higher methylation in iMeLCs"]]@anno)[subjectHits(m), "annotation"]
mcols(DMRs_annotated$hEGCLCsvsiMeLCs)[queryHits(m), "ChipSeekerDistanceToTss"] <- mcols(peakAnnoList[["Higher methylation in iMeLCs"]]@anno)[subjectHits(m), "distanceToTSS"]
mcols(DMRs_annotated$hEGCLCsvsiMeLCs)[queryHits(m), "ChipSeekerGeneId"] <- mcols(peakAnnoList[["Higher methylation in iMeLCs"]]@anno)[subjectHits(m), "geneId"]

Adding entrez gene id, gene symbol and gene information

DMRs_annotated$hEGCLCsvsiMeLCs <- as.data.frame(DMRs_annotated$hEGCLCsvsiMeLCs)
DMRs_annotated$hEGCLCsvsiMeLCs <- dplyr::left_join(DMRs_annotated$hEGCLCsvsiMeLCs, Genes_df, by=c('ChipSeekerGeneId' = 'ensembl_gene_id_version')) 
DMRs_annotated$hEGCLCsvsiMeLCs[!is.na(DMRs_annotated$hEGCLCsvsiMeLCs$hgnc_symbol),] %>% select(-c("ChipSeekerDistanceToTss", "ChipSeekerGeneId", "ensembl_gene_id", "entrez_gene_id")) %>% distinct() %>%
    datatable(class = 'hover', rownames = FALSE, caption="DMRs - iMeLCsvshEGCLCs", extension='Buttons', escape = FALSE, 
              options = list(pageLength=20, dom='Bfrtip', autoWidth=TRUE, 
                             buttons=list(c('csv', 'excel'))))

5. Saving DMRs with annotation and Session Info

saveRDS(Genes_df, paste0(OutputFolder, '/', 'GeneUniverse.rds'))
saveRDS(DMRs_annotated, paste0(OutputFolder, '/', 'DMRsAnnotated_final.rds'))
SessionInfo <- sessionInfo()
Date <- date()
Date
## [1] "Thu Jan  9 19:24:24 2025"
SessionInfo
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] data.table_1.14.8           DT_0.28                    
##  [3] annotatr_1.24.0             dmrseq_1.18.1              
##  [5] clusterProfiler_4.6.2       ReactomePA_1.42.0          
##  [7] org.Hs.eg.db_3.16.0         AnnotationDbi_1.60.2       
##  [9] ChIPseeker_1.34.1           gridExtra_2.3              
## [11] scales_1.2.1                plotly_4.10.2              
## [13] ggplot2_3.4.2               DSS_2.46.0                 
## [15] bsseq_1.34.0                SummarizedExperiment_1.28.0
## [17] MatrixGenerics_1.10.0       matrixStats_1.0.0          
## [19] GenomicRanges_1.50.2        GenomeInfoDb_1.34.9        
## [21] IRanges_2.32.0              S4Vectors_0.36.2           
## [23] BiocParallel_1.32.6         Biobase_2.58.0             
## [25] BiocGenerics_0.44.0        
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.3                             
##   [2] R.utils_2.12.2                         
##   [3] tidyselect_1.2.0                       
##   [4] RSQLite_2.3.1                          
##   [5] htmlwidgets_1.6.2                      
##   [6] grid_4.2.1                             
##   [7] scatterpie_0.1.8                       
##   [8] munsell_0.5.0                          
##   [9] codetools_0.2-19                       
##  [10] withr_2.5.0                            
##  [11] colorspace_2.1-0                       
##  [12] GOSemSim_2.24.0                        
##  [13] filelock_1.0.2                         
##  [14] highr_0.10                             
##  [15] knitr_1.43                             
##  [16] rstudioapi_0.15.0                      
##  [17] DOSE_3.24.2                            
##  [18] labeling_0.4.2                         
##  [19] GenomeInfoDbData_1.2.9                 
##  [20] polyclip_1.10-4                        
##  [21] bit64_4.0.5                            
##  [22] farver_2.1.1                           
##  [23] rhdf5_2.42.1                           
##  [24] downloader_0.4                         
##  [25] vctrs_0.6.3                            
##  [26] treeio_1.23.1                          
##  [27] generics_0.1.3                         
##  [28] gson_0.1.0                             
##  [29] xfun_0.39                              
##  [30] BiocFileCache_2.6.1                    
##  [31] regioneR_1.30.0                        
##  [32] R6_2.5.1                               
##  [33] graphlayouts_0.8.4                     
##  [34] locfit_1.5-9.7                         
##  [35] bitops_1.0-7                           
##  [36] rhdf5filters_1.10.1                    
##  [37] cachem_1.0.8                           
##  [38] fgsea_1.24.0                           
##  [39] gridGraphics_0.5-1                     
##  [40] DelayedArray_0.24.0                    
##  [41] promises_1.2.0.1                       
##  [42] BiocIO_1.8.0                           
##  [43] ggraph_2.1.0                           
##  [44] enrichplot_1.18.4                      
##  [45] gtable_0.3.3                           
##  [46] tidygraph_1.2.3                        
##  [47] rlang_1.1.1                            
##  [48] splines_4.2.1                          
##  [49] rtracklayer_1.58.0                     
##  [50] lazyeval_0.2.2                         
##  [51] BiocManager_1.30.20                    
##  [52] yaml_2.3.7                             
##  [53] reshape2_1.4.4                         
##  [54] crosstalk_1.2.0                        
##  [55] GenomicFeatures_1.50.4                 
##  [56] httpuv_1.6.11                          
##  [57] qvalue_2.30.0                          
##  [58] tools_4.2.1                            
##  [59] ggplotify_0.1.0                        
##  [60] ellipsis_0.3.2                         
##  [61] gplots_3.1.3                           
##  [62] jquerylib_0.1.4                        
##  [63] RColorBrewer_1.1-3                     
##  [64] Rcpp_1.0.11                            
##  [65] plyr_1.8.8                             
##  [66] sparseMatrixStats_1.10.0               
##  [67] progress_1.2.2                         
##  [68] zlibbioc_1.44.0                        
##  [69] purrr_1.0.1                            
##  [70] RCurl_1.98-1.12                        
##  [71] prettyunits_1.1.1                      
##  [72] viridis_0.6.2                          
##  [73] bumphunter_1.40.0                      
##  [74] cowplot_1.1.1                          
##  [75] ggrepel_0.9.3                          
##  [76] magrittr_2.0.3                         
##  [77] reactome.db_1.82.0                     
##  [78] xtable_1.8-4                           
##  [79] mime_0.12                              
##  [80] hms_1.1.3                              
##  [81] patchwork_1.1.2                        
##  [82] evaluate_0.21                          
##  [83] HDO.db_0.99.1                          
##  [84] XML_3.99-0.14                          
##  [85] compiler_4.2.1                         
##  [86] biomaRt_2.54.1                         
##  [87] tibble_3.2.1                           
##  [88] KernSmooth_2.23-22                     
##  [89] crayon_1.5.2                           
##  [90] shadowtext_0.1.2                       
##  [91] R.oo_1.25.0                            
##  [92] htmltools_0.5.5                        
##  [93] tzdb_0.4.0                             
##  [94] later_1.3.1                            
##  [95] ggfun_0.0.9                            
##  [96] tidyr_1.3.0                            
##  [97] aplot_0.1.10                           
##  [98] DBI_1.1.3                              
##  [99] tweenr_2.0.2                           
## [100] dbplyr_2.3.3                           
## [101] MASS_7.3-60                            
## [102] rappdirs_0.3.3                         
## [103] boot_1.3-28.1                          
## [104] readr_2.1.4                            
## [105] Matrix_1.6-0                           
## [106] permute_0.9-7                          
## [107] cli_3.6.1                              
## [108] R.methodsS3_1.8.2                      
## [109] igraph_1.5.0                           
## [110] pkgconfig_2.0.3                        
## [111] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [112] GenomicAlignments_1.34.1               
## [113] foreach_1.5.2                          
## [114] xml2_1.3.5                             
## [115] ggtree_3.6.2                           
## [116] bslib_0.5.0                            
## [117] rngtools_1.5.2                         
## [118] XVector_0.38.0                         
## [119] doRNG_1.8.6                            
## [120] yulab.utils_0.0.6                      
## [121] stringr_1.5.0                          
## [122] digest_0.6.33                          
## [123] graph_1.76.0                           
## [124] Biostrings_2.66.0                      
## [125] rmarkdown_2.23                         
## [126] fastmatch_1.1-3                        
## [127] tidytree_0.4.2                         
## [128] DelayedMatrixStats_1.20.0              
## [129] restfulr_0.0.15                        
## [130] curl_5.0.1                             
## [131] shiny_1.7.4.1                          
## [132] Rsamtools_2.14.0                       
## [133] gtools_3.9.4                           
## [134] graphite_1.44.0                        
## [135] rjson_0.2.21                           
## [136] outliers_0.15                          
## [137] lifecycle_1.0.3                        
## [138] nlme_3.1-162                           
## [139] jsonlite_1.8.7                         
## [140] Rhdf5lib_1.20.0                        
## [141] viridisLite_0.4.2                      
## [142] limma_3.54.2                           
## [143] BSgenome_1.66.3                        
## [144] fansi_1.0.4                            
## [145] pillar_1.9.0                           
## [146] lattice_0.21-8                         
## [147] KEGGREST_1.38.0                        
## [148] fastmap_1.1.1                          
## [149] httr_1.4.6                             
## [150] plotrix_3.8-2                          
## [151] GO.db_3.16.0                           
## [152] interactiveDisplayBase_1.36.0          
## [153] glue_1.6.2                             
## [154] iterators_1.0.14                       
## [155] png_0.1-8                              
## [156] BiocVersion_3.16.0                     
## [157] bit_4.0.5                              
## [158] ggforce_0.4.1                          
## [159] stringi_1.7.12                         
## [160] sass_0.4.7                             
## [161] HDF5Array_1.26.0                       
## [162] blob_1.2.4                             
## [163] AnnotationHub_3.6.0                    
## [164] caTools_1.18.2                         
## [165] memoise_2.0.1                          
## [166] dplyr_1.1.2                            
## [167] ape_5.7-1