library(DSS)
## Loading required package: Biobase
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: BiocParallel
## Loading required package: bsseq
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## The following object is masked from 'package:Biobase':
##
## rowMedians
## Loading required package: parallel
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:IRanges':
##
## slice
## The following object is masked from 'package:S4Vectors':
##
## rename
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
library(scales)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:bsseq':
##
## combine
## The following object is masked from 'package:Biobase':
##
## combine
## The following object is masked from 'package:BiocGenerics':
##
## combine
library(ChIPseeker)
##
## ChIPseeker v1.34.1 For help: https://guangchuangyu.github.io/software/ChIPseeker
##
## If you use ChIPseeker in published research, please cite:
## Qianwen Wang, Ming Li, Tianzhi Wu, Li Zhan, Lin Li, Meijun Chen, Wenqin Xie, Zijing Xie, Erqiang Hu, Shuangbin Xu, Guangchuang Yu. Exploring epigenomic datasets by ChIPseeker. Current Protocols 2022, 2(10): e585
library(org.Hs.eg.db)
## Loading required package: AnnotationDbi
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:plotly':
##
## select
##
library(ReactomePA)
## ReactomePA v1.42.0 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use ReactomePA in published research, please cite:
## Guangchuang Yu, Qing-Yu He. ReactomePA: an R/Bioconductor package for reactome pathway analysis and visualization. Molecular BioSystems 2016, 12(2):477-479
library(clusterProfiler)
## clusterProfiler v4.6.2 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
##
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:AnnotationDbi':
##
## select
## The following object is masked from 'package:IRanges':
##
## slice
## The following object is masked from 'package:S4Vectors':
##
## rename
## The following object is masked from 'package:stats':
##
## filter
library(dmrseq)
library(annotatr)
library(DT)
library(data.table)
##
## Attaching package: 'data.table'
## The following object is masked from 'package:SummarizedExperiment':
##
## shift
## The following object is masked from 'package:GenomicRanges':
##
## shift
## The following object is masked from 'package:IRanges':
##
## shift
## The following objects are masked from 'package:S4Vectors':
##
## first, second
<- "~/DataDir/3.TwistBedAnn/Input/gencode.v35.annotation.gtf.gz"
GTFFile <- GenomicFeatures::makeTxDbFromGFF(GTFFile, format="gtf")
txdb_v35 ## Import genomic features from the file as a GRanges object ...
## OK
## Prepare the 'metadata' data frame ... OK
## Make the TxDb object ...
## Warning in .get_cds_IDX(mcols0$type, mcols0$phase): The "phase" metadata column contains non-NA values for features of type
## stop_codon. This information was ignored.
## OK
<- params$InputFolder
InputFolder <- params$OutputFolder OutputFolder
Loading of bsseq object, with CpGs shared by 75% of samples (after sample selection).
<- readRDS("~/DataDir/2.DifferentialAnalysis/Input/bsseq_obj_sharedby75ofall.rds") #getting bsseq object where sample selection and CpG filtering were already performed bsseq_obj
The DMRs here selected are called like this:
delta=0.25
and p.threshold=0.0001
<- readRDS(paste0(InputFolder, '/', 'DMRs.rds'))
DMRs #DMLs <- readRDS(paste0(InputFolder, '/', 'DMLs.rds'))
Importing Twist regions that were annotated
<- readRDS(params$TwistAnnotated)
TwistAnnotated %>% head()
TwistAnnotated ## seqnames start end width strand
## 1 chr1 10466 10585 120 *
## 2 chr1 10790 10909 120 *
## 3 chr1 15806 15925 120 *
## 4 chr1 18768 18887 120 *
## 5 chr1 29357 29476 120 *
## 6 chr1 36544 36663 120 *
## ann
## 1 cg14817997,cpg_inter
## 2 cg16269199,cg26928153,cpg_inter
## 3 CTCF_binding_site,cg13869341,cpg_inter,promoter_flanking_region
## 4 cg14008030,cpg_inter
## 5 cg12045430,cg20826792,cpg_islands,promoter
## 6 cg18231760,cpg_inter,promoter_flanking_region
## annotation geneChr geneStart geneEnd geneLength geneStrand
## 1 Promoter (1-2kb) 1 11869 14409 2541 1
## 2 Promoter (<=1kb) 1 11869 14409 2541 1
## 3 Promoter (1-2kb) 1 17369 17436 68 2
## 4 Promoter (1-2kb) 1 17369 17436 68 2
## 5 Promoter (<=1kb) 1 29554 31097 1544 1
## 6 Promoter (<=1kb) 1 34554 36081 1528 2
## ensembl_gene_id_version transcriptId distanceToTSS
## 1 ENSG00000223972.5 ENST00000456328.2 -1284
## 2 ENSG00000223972.5 ENST00000456328.2 -960
## 3 ENSG00000278267.1 ENST00000619216.1 1511
## 4 ENSG00000278267.1 ENST00000619216.1 -1332
## 5 ENSG00000243485.5 ENST00000473358.1 -78
## 6 ENSG00000237613.2 ENST00000417324.1 -463
## flank_txIds
## 1 ENST00000456328.2;ENST00000450305.2;ENST00000488147.1
## 2 ENST00000456328.2;ENST00000450305.2;ENST00000488147.1
## 3 ENST00000456328.2;ENST00000450305.2;ENST00000488147.1;ENST00000619216.1
## 4 ENST00000456328.2;ENST00000488147.1;ENST00000619216.1
## 5 ENST00000488147.1;ENST00000473358.1;ENST00000469289.1;ENST00000607096.1
## 6 ENST00000417324.1;ENST00000461467.1
## flank_geneIds
## 1 ENSG00000223972.5;ENSG00000223972.5;ENSG00000227232.5
## 2 ENSG00000223972.5;ENSG00000223972.5;ENSG00000227232.5
## 3 ENSG00000223972.5;ENSG00000223972.5;ENSG00000227232.5;ENSG00000278267.1
## 4 ENSG00000223972.5;ENSG00000227232.5;ENSG00000278267.1
## 5 ENSG00000227232.5;ENSG00000243485.5;ENSG00000243485.5;ENSG00000284332.1
## 6 ENSG00000237613.2;ENSG00000237613.2
## flank_gene_distances ensembl_gene_id hgnc_symbol external_gene_name
## 1 -1284;-1425;0 ENSG00000223972 DDX11L1 DDX11L1
## 2 -960;-1101;0 ENSG00000223972 DDX11L1 DDX11L1
## 3 3937;3796;0;1511 ENSG00000278267 MIR6859-1 MIR6859-1
## 4 6899;0;-1332 ENSG00000278267 MIR6859-1 MIR6859-1
## 5 0;-78;-791;-890 ENSG00000243485 MIR1302-2HG MIR1302-2HG
## 6 -463;-471 ENSG00000237613 FAM138A FAM138A
## gene_biotype
## 1 transcribed_unprocessed_pseudogene
## 2 transcribed_unprocessed_pseudogene
## 3 miRNA
## 4 miRNA
## 5 lncRNA
## 6 lncRNA
## description
## 1 DEAD/H-box helicase 11 like 1 (pseudogene) [Source:HGNC Symbol;Acc:HGNC:37102]
## 2 DEAD/H-box helicase 11 like 1 (pseudogene) [Source:HGNC Symbol;Acc:HGNC:37102]
## 3 microRNA 6859-1 [Source:HGNC Symbol;Acc:HGNC:50039]
## 4 microRNA 6859-1 [Source:HGNC Symbol;Acc:HGNC:50039]
## 5 MIR1302-2 host gene [Source:HGNC Symbol;Acc:HGNC:52482]
## 6 family with sequence similarity 138 member A [Source:HGNC Symbol;Acc:HGNC:32334]
## chromosome_name start_position end_position
## 1 1 11869 14409
## 2 1 11869 14409
## 3 1 17369 17436
## 4 1 17369 17436
## 5 1 29554 31109
## 6 1 34554 36081
<- TwistAnnotated[, c("ensembl_gene_id_version", "ensembl_gene_id", "hgnc_symbol", "gene_biotype")] %>% dplyr::distinct()%>%filter(!is.na(hgnc_symbol))%>%filter(!hgnc_symbol%in%"")
Genes_df rownames(Genes_df) <- NULL
These are the genes with the same symbol but different ensembl id
$hgnc_symbol%in%Genes_df[duplicated(Genes_df$hgnc_symbol), "hgnc_symbol"],]
Genes_df[Genes_df## ensembl_gene_id_version ensembl_gene_id hgnc_symbol
## 3309 ENSG00000285053.1 ENSG00000285053 TBCE
## 3310 ENSG00000284770.2 ENSG00000284770 TBCE
## 20449 ENSG00000237940.3 ENSG00000237940 LINC01238
## 20451 ENSG00000261186.2 ENSG00000261186 LINC01238
## 21924 ENSG00000206195.11 ENSG00000206195 DUXAP8
## 21926 ENSG00000271672.1 ENSG00000271672 DUXAP8
## 24581 ENSG00000284862.3 ENSG00000284862 CCDC39
## 24582 ENSG00000145075.13 ENSG00000145075 CCDC39
## 30308 ENSG00000272655.2 ENSG00000272655 POLR2J4
## 30309 ENSG00000214783.9 ENSG00000214783 POLR2J4
## gene_biotype
## 3309 protein_coding
## 3310 protein_coding
## 20449 lncRNA
## 20451 lncRNA
## 21924 lncRNA
## 21926 transcribed_processed_pseudogene
## 24581 protein_coding
## 24582 lncRNA
## 30308 transcribed_unprocessed_pseudogene
## 30309 lncRNA
$entrez_gene_id <- as.vector(mapIds(org.Hs.eg.db, Genes_df$hgnc_symbol, "ENTREZID","SYMBOL"))
Genes_df## 'select()' returned 1:many mapping between keys and columns
nrow(Genes_df[is.na(Genes_df$entrez_gene_id),]) #number of genes without an entrez id
## [1] 421
<- unique(Genes_df$hgnc_symbol)
GeneUniverse_symbol <- na.omit(unique(Genes_df$entrez_gene_id)) GeneUniverse_entrezid
Remember that position form bedGraph files are 0-based while R works
with 1-based position and therefore also the packages we will go to use,
this is the reason why I set starts.in.df.are.0based = TRUE
in creating the GRanges objects, it converts 0-based to 1-based
coordinates.
<- DMRs
DMRs_GRanges #DMLs_GRanges <- DMLs
for(i in 1:length(DMRs)){
<- makeGRangesFromDataFrame(DMRs_GRanges[[i]], keep.extra.columns = TRUE,
DMRs_GRanges[[i]] start.field = "start", end.field = "end", starts.in.df.are.0based = TRUE)
#DMLs_GRanges[[i]] <- makeGRangesFromDataFrame(DMLs_GRanges[[i]], keep.extra.columns = TRUE,
#start.field = "start", end.field = "end", starts.in.df.are.0based = TRUE)
}
ChIPseeker
DMRs are now annotated to their associated genes and genomic regions
using the annotatePeaks
function of the
ChIPseeker
R package, using a TxDb object. The GenomicFeatures
package uses TxDb
objects to store transcript metadata.
This class maps the 5’ and 3’ untranslated regions (UTRs), protein
coding sequences (CDSs) and exons for a set of mRNA transcripts to their
associated genome. All TxDb
objects are backed by a SQLite
database that manages genomic locations and the relationships between
pre-processed mRNA transcripts, exons, protein coding sequences, and
their related gene identifiers.
TxDb created by us
<- lapply(list("Higher methylation in hiPSCs" = DMRs_GRanges$hEGCLCsvshiPSCs[DMRs_GRanges$hEGCLCsvshiPSCs$diff.Methy<0,], "Higher methylation in hEGCLCs" = DMRs_GRanges$hEGCLCsvshiPSCs[DMRs_GRanges$hEGCLCsvshiPSCs$diff.Methy>0,]), annotatePeak, TxDb=txdb_v35, tssRegion=c(-3000, 3000), verbose=FALSE) peakAnnoList
::plotAnnoBar(peakAnnoList) ChIPseeker
::plotDistToTSS(peakAnnoList) ChIPseeker
Given a list of gene set, compareCluster
function will
compute profiles of each gene cluster.
= lapply(peakAnnoList, function(i) unique(as.data.frame(i)$geneId))
genes names(genes) = sub("_", "\n", names(genes))
any(duplicated(genes$`Higher methylation in hiPSCs`))
## [1] FALSE
any(duplicated(genes$`Higher methylation in hEGCLCs`))
## [1] FALSE
There are no duplicated ensembl ids
table(genes$`Higher methylation in hEGCLCs` %in% Genes_df$ensembl_gene_id_version)
##
## FALSE TRUE
## 4 11
$`Higher methylation in hEGCLCs` [!genes$`Higher methylation in hEGCLCs` %in% Genes_df$ensembl_gene_id_version]
genes## [1] "ENSG00000287690.1" "ENSG00000286525.1" "ENSG00000231078.1"
## [4] "ENSG00000205622.11"
table(genes$`Higher methylation in hiPSCs`%in% Genes_df$ensembl_gene_id_version)
##
## FALSE TRUE
## 155 530
<- Genes_df[Genes_df$ensembl_gene_id_version %in% genes$`Higher methylation in hiPSCs`, "entrez_gene_id"]
DMG_uphiPSCsvshEGCLCs <- na.omit(DMG_uphiPSCsvshEGCLCs[!DMG_uphiPSCsvshEGCLCs %in% ""])
DMG_uphiPSCsvshEGCLCs <- Genes_df[Genes_df$ensembl_gene_id_version %in% genes$`Higher methylation in hEGCLCs`, "entrez_gene_id"]
DMG_downhiPSCsvshEGCLCs <- na.omit(DMG_downhiPSCsvshEGCLCs[!DMG_downhiPSCsvshEGCLCs %in% ""]) DMG_downhiPSCsvshEGCLCs
any(duplicated(DMG_uphiPSCsvshEGCLCs))
## [1] FALSE
any(duplicated(DMG_downhiPSCsvshEGCLCs))
## [1] FALSE
$`Higher methylation in hiPSCs` <- DMG_uphiPSCsvshEGCLCs
genes$`Higher methylation in hEGCLCs` <- DMG_downhiPSCsvshEGCLCs genes
<- compareCluster(geneCluster = genes,
compKEGG fun = "enrichKEGG",
organism="hsa",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
universe = GeneUniverse_entrezid)
## Warning in compareCluster(geneCluster = genes, fun = "enrichKEGG", organism =
## "hsa", : No enrichment found in any of gene cluster, please check your input...
There are no significant terms (They have Padj>0.05)
#dotplot(compKEGG, showCategory = 15, title = "KEGG Pathway Enrichment Analysis")
<- compareCluster(geneCluster = genes,
compGO fun = "enrichGO",
keyType = "ENTREZID",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
OrgDb='org.Hs.eg.db',
universe=GeneUniverse_entrezid,
ont = "ALL")
dotplot(compGO, showCategory = 15, title = "GO Enrichment Analysis")
A new DMRs_annotated
object is created in which
annotation from ChIPseeker
is added.
<- DMRs_GRanges[-2]
DMRs_annotated
mcols(DMRs_annotated$hEGCLCsvshiPSCs)$ChipSeekerAnn <- NA
mcols(DMRs_annotated$hEGCLCsvshiPSCs)$ChipSeekerDistanceToTss <- NA
mcols(DMRs_annotated$hEGCLCsvshiPSCs)$ChipSeekerGeneId <- NA
<- findOverlaps(DMRs_annotated$hEGCLCsvshiPSCs, peakAnnoList[["Higher methylation in hiPSCs"]]@anno)
m mcols(DMRs_annotated$hEGCLCsvshiPSCs)[queryHits(m), "ChipSeekerAnn"] <- mcols(peakAnnoList[["Higher methylation in hiPSCs"]]@anno)[subjectHits(m), "annotation"]
mcols(DMRs_annotated$hEGCLCsvshiPSCs)[queryHits(m), "ChipSeekerDistanceToTss"] <- mcols(peakAnnoList[["Higher methylation in hiPSCs"]]@anno)[subjectHits(m), "distanceToTSS"]
mcols(DMRs_annotated$hEGCLCsvshiPSCs)[queryHits(m), "ChipSeekerGeneId"] <- mcols(peakAnnoList[["Higher methylation in hiPSCs"]]@anno)[subjectHits(m), "geneId"]
<- findOverlaps(DMRs_annotated$hEGCLCsvshiPSCs, peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)
m mcols(DMRs_annotated$hEGCLCsvshiPSCs)[queryHits(m), "ChipSeekerAnn"] <- mcols(peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)[subjectHits(m), "annotation"]
mcols(DMRs_annotated$hEGCLCsvshiPSCs)[queryHits(m), "ChipSeekerDistanceToTss"] <- mcols(peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)[subjectHits(m), "distanceToTSS"]
mcols(DMRs_annotated$hEGCLCsvshiPSCs)[queryHits(m), "ChipSeekerGeneId"] <- mcols(peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)[subjectHits(m), "geneId"]
Adding entrez gene id, gene symbol and gene information
$hEGCLCsvshiPSCs <- as.data.frame(DMRs_annotated$hEGCLCsvshiPSCs)
DMRs_annotated$hEGCLCsvshiPSCs <- dplyr::left_join(DMRs_annotated$hEGCLCsvshiPSCs, Genes_df, by=c('ChipSeekerGeneId' = 'ensembl_gene_id_version')) DMRs_annotated
$hEGCLCsvshiPSCs[!is.na(DMRs_annotated$hEGCLCsvshiPSCs$hgnc_symbol),] %>% select(-c("ChipSeekerDistanceToTss", "ChipSeekerGeneId", "ensembl_gene_id", "entrez_gene_id")) %>% distinct() %>%
DMRs_annotateddatatable(class = 'hover', rownames = FALSE, caption="DMRs - hEGCLCsvshiPSCs", extension='Buttons', escape = FALSE,
options = list(pageLength=20, dom='Bfrtip', autoWidth=TRUE,
buttons=list(c('csv', 'excel'))))
ChIPseeker
DMRs are now annotated to their associated genes and genomic regions
using the annotatePeaks
function of the
ChIPseeker
R package, using a TxDb object. The GenomicFeatures
package uses TxDb
objects to store transcript metadata.
This class maps the 5’ and 3’ untranslated regions (UTRs), protein
coding sequences (CDSs) and exons for a set of mRNA transcripts to their
associated genome. All TxDb
objects are backed by a SQLite
database that manages genomic locations and the relationships between
pre-processed mRNA transcripts, exons, protein coding sequences, and
their related gene identifiers.
TxDb created by us
<- lapply(list("Higher methylation in hPGCLCs" = DMRs_GRanges$hEGCLCsvshPGCLCs[DMRs_GRanges$hEGCLCsvshPGCLCs$diff.Methy<0,], "Higher methylation in hEGCLCs" = DMRs_GRanges$hEGCLCsvshPGCLCs[DMRs_GRanges$hEGCLCsvshPGCLCs$diff.Methy>0,]), annotatePeak, TxDb=txdb_v35, tssRegion=c(-3000, 3000), verbose=FALSE) peakAnnoList
::plotAnnoBar(peakAnnoList) ChIPseeker
::plotDistToTSS(peakAnnoList) ChIPseeker
Given a list of gene set, compareCluster
function will
compute profiles of each gene cluster.
= lapply(peakAnnoList, function(i) unique(as.data.frame(i)$geneId))
genes names(genes) = sub("_", "\n", names(genes))
any(duplicated(genes$`Higher methylation in hPGCLCs`))
## [1] FALSE
any(duplicated(genes$`Higher methylation in hEGCLCs`))
## [1] FALSE
There are no duplicated ensembl ids
table(genes$`Higher methylation in hEGCLCs` %in% Genes_df$ensembl_gene_id_version)
##
## FALSE TRUE
## 389 1310
$`Higher methylation in hEGCLCs` [!genes$`Higher methylation in hEGCLCs` %in% Genes_df$ensembl_gene_id_version]
genes## [1] "ENSG00000272849.1" "ENSG00000224079.1" "ENSG00000225339.4"
## [4] "ENSG00000275178.1" "ENSG00000288549.1" "ENSG00000275088.1"
## [7] "ENSG00000286295.1" "ENSG00000279135.1" "ENSG00000280326.1"
## [10] "ENSG00000267665.2" "ENSG00000237326.1" "ENSG00000287403.1"
## [13] "ENSG00000233052.1" "ENSG00000286535.1" "ENSG00000284691.1"
## [16] "ENSG00000263017.1" "ENSG00000285855.1" "ENSG00000285163.1"
## [19] "ENSG00000287428.1" "ENSG00000230676.1" "ENSG00000234275.4"
## [22] "ENSG00000287690.1" "ENSG00000259362.2" "ENSG00000287407.1"
## [25] "ENSG00000267244.5" "ENSG00000279412.1" "ENSG00000237126.8"
## [28] "ENSG00000278716.1" "ENSG00000238010.1" "ENSG00000230526.1"
## [31] "ENSG00000274373.1" "ENSG00000276166.1" "ENSG00000255342.1"
## [34] "ENSG00000249808.3" "ENSG00000238031.1" "ENSG00000287787.1"
## [37] "ENSG00000253397.1" "ENSG00000241449.6" "ENSG00000279065.1"
## [40] "ENSG00000256564.1" "ENSG00000227479.1" "ENSG00000270490.1"
## [43] "ENSG00000282718.1" "ENSG00000233891.8" "ENSG00000215381.3"
## [46] "ENSG00000272988.1" "ENSG00000199949.2" "ENSG00000231113.2"
## [49] "ENSG00000237371.1" "ENSG00000253543.1" "ENSG00000257883.1"
## [52] "ENSG00000270911.1" "ENSG00000238232.1" "ENSG00000254607.2"
## [55] "ENSG00000287823.1" "ENSG00000262884.1" "ENSG00000258698.1"
## [58] "ENSG00000270972.1" "ENSG00000286240.1" "ENSG00000284966.3"
## [61] "ENSG00000235811.1" "ENSG00000279931.1" "ENSG00000233683.1"
## [64] "ENSG00000237720.2" "ENSG00000245688.1" "ENSG00000234793.1"
## [67] "ENSG00000280444.1" "ENSG00000231482.3" "ENSG00000267090.1"
## [70] "ENSG00000228154.3" "ENSG00000286525.1" "ENSG00000266933.2"
## [73] "ENSG00000287046.1" "ENSG00000282952.1" "ENSG00000260855.1"
## [76] "ENSG00000216775.3" "ENSG00000279711.1" "ENSG00000230990.2"
## [79] "ENSG00000261054.1" "ENSG00000279705.1" "ENSG00000286286.1"
## [82] "ENSG00000262652.1" "ENSG00000287073.1" "ENSG00000268750.7"
## [85] "ENSG00000261938.1" "ENSG00000285773.1" "ENSG00000255845.1"
## [88] "ENSG00000250577.1" "ENSG00000287161.1" "ENSG00000261189.1"
## [91] "ENSG00000205325.3" "ENSG00000231170.7" "ENSG00000254755.1"
## [94] "ENSG00000254933.1" "ENSG00000286502.1" "ENSG00000226047.1"
## [97] "ENSG00000225092.2" "ENSG00000261606.5" "ENSG00000269667.2"
## [100] "ENSG00000261976.2" "ENSG00000234428.2" "ENSG00000287906.1"
## [103] "ENSG00000259341.2" "ENSG00000279076.1" "ENSG00000287338.1"
## [106] "ENSG00000269976.1" "ENSG00000231078.1" "ENSG00000232408.1"
## [109] "ENSG00000235994.4" "ENSG00000226169.1" "ENSG00000278330.1"
## [112] "ENSG00000261177.1" "ENSG00000253140.2" "ENSG00000219133.2"
## [115] "ENSG00000256955.2" "ENSG00000237372.4" "ENSG00000254111.3"
## [118] "ENSG00000250409.1" "ENSG00000271709.1" "ENSG00000275155.1"
## [121] "ENSG00000226868.1" "ENSG00000229628.1" "ENSG00000234949.2"
## [124] "ENSG00000250230.3" "ENSG00000234378.1" "ENSG00000261211.1"
## [127] "ENSG00000224545.1" "ENSG00000203647.2" "ENSG00000260473.2"
## [130] "ENSG00000249236.2" "ENSG00000286805.1" "ENSG00000275741.1"
## [133] "ENSG00000242009.2" "ENSG00000261534.1" "ENSG00000279841.1"
## [136] "ENSG00000231876.8" "ENSG00000256481.1" "ENSG00000253445.1"
## [139] "ENSG00000287680.1" "ENSG00000225156.2" "ENSG00000287235.1"
## [142] "ENSG00000205622.11" "ENSG00000287089.1" "ENSG00000201196.1"
## [145] "ENSG00000231539.1" "ENSG00000279261.2" "ENSG00000225028.1"
## [148] "ENSG00000236676.1" "ENSG00000250338.1" "ENSG00000260185.1"
## [151] "ENSG00000224195.1" "ENSG00000225140.1" "ENSG00000253307.1"
## [154] "ENSG00000254364.1" "ENSG00000259252.1" "ENSG00000227527.2"
## [157] "ENSG00000270531.1" "ENSG00000228361.2" "ENSG00000287756.1"
## [160] "ENSG00000231073.1" "ENSG00000288040.1" "ENSG00000286198.1"
## [163] "ENSG00000267658.1" "ENSG00000201549.1" "ENSG00000254205.1"
## [166] "ENSG00000287828.1" "ENSG00000259920.1" "ENSG00000243491.1"
## [169] "ENSG00000280110.1" "ENSG00000258560.1" "ENSG00000254777.5"
## [172] "ENSG00000285707.1" "ENSG00000230163.1" "ENSG00000280354.1"
## [175] "ENSG00000248359.2" "ENSG00000234640.1" "ENSG00000225187.1"
## [178] "ENSG00000256314.1" "ENSG00000279430.1" "ENSG00000235563.1"
## [181] "ENSG00000225632.1" "ENSG00000264914.1" "ENSG00000262869.1"
## [184] "ENSG00000283782.2" "ENSG00000235538.3" "ENSG00000242531.1"
## [187] "ENSG00000279526.1" "ENSG00000261555.1" "ENSG00000254789.3"
## [190] "ENSG00000201042.1" "ENSG00000246790.2" "ENSG00000253688.2"
## [193] "ENSG00000264093.1" "ENSG00000232409.1" "ENSG00000287387.1"
## [196] "ENSG00000250007.7" "ENSG00000248645.1" "ENSG00000286320.1"
## [199] "ENSG00000273160.1" "ENSG00000258935.1" "ENSG00000234174.1"
## [202] "ENSG00000196696.13" "ENSG00000254559.1" "ENSG00000287822.1"
## [205] "ENSG00000251270.1" "ENSG00000237617.1" "ENSG00000253125.1"
## [208] "ENSG00000262445.3" "ENSG00000259420.5" "ENSG00000226087.2"
## [211] "ENSG00000229673.1" "ENSG00000236936.1" "ENSG00000225315.2"
## [214] "ENSG00000286641.1" "ENSG00000212589.1" "ENSG00000279982.1"
## [217] "ENSG00000226706.1" "ENSG00000280270.1" "ENSG00000286356.1"
## [220] "ENSG00000217514.1" "ENSG00000272097.2" "ENSG00000250576.1"
## [223] "ENSG00000259579.1" "ENSG00000279409.1" "ENSG00000232716.2"
## [226] "ENSG00000261702.2" "ENSG00000277825.1" "ENSG00000260004.1"
## [229] "ENSG00000267245.1" "ENSG00000235023.1" "ENSG00000254290.1"
## [232] "ENSG00000286610.1" "ENSG00000226927.1" "ENSG00000286635.1"
## [235] "ENSG00000248521.1" "ENSG00000288079.1" "ENSG00000260640.1"
## [238] "ENSG00000277319.1" "ENSG00000224731.1" "ENSG00000260454.2"
## [241] "ENSG00000286505.1" "ENSG00000261095.1" "ENSG00000286364.1"
## [244] "ENSG00000262966.2" "ENSG00000261472.1" "ENSG00000228683.1"
## [247] "ENSG00000276476.3" "ENSG00000274379.1" "ENSG00000235096.2"
## [250] "ENSG00000226398.1" "ENSG00000201541.1" "ENSG00000288639.1"
## [253] "ENSG00000280392.1" "ENSG00000249727.2" "ENSG00000226965.2"
## [256] "ENSG00000271114.1" "ENSG00000251445.1" "ENSG00000229305.1"
## [259] "ENSG00000201584.1" "ENSG00000203392.3" "ENSG00000280004.1"
## [262] "ENSG00000287975.1" "ENSG00000283128.2" "ENSG00000228613.1"
## [265] "ENSG00000258346.1" "ENSG00000248962.1" "ENSG00000224794.2"
## [268] "ENSG00000287903.1" "ENSG00000261723.1" "ENSG00000285367.1"
## [271] "ENSG00000244260.2" "ENSG00000287560.1" "ENSG00000233096.1"
## [274] "ENSG00000231251.1" "ENSG00000286390.1" "ENSG00000256452.1"
## [277] "ENSG00000272922.1" "ENSG00000263508.6" "ENSG00000280217.1"
## [280] "ENSG00000226963.1" "ENSG00000280138.1" "ENSG00000249930.1"
## [283] "ENSG00000202318.1" "ENSG00000262810.1" "ENSG00000261710.1"
## [286] "ENSG00000283511.1" "ENSG00000237422.2" "ENSG00000268080.2"
## [289] "ENSG00000235886.1" "ENSG00000288615.1" "ENSG00000233633.2"
## [292] "ENSG00000229618.3" "ENSG00000272215.1" "ENSG00000280222.1"
## [295] "ENSG00000263723.1" "ENSG00000259202.1" "ENSG00000220739.2"
## [298] "ENSG00000272715.1" "ENSG00000287145.1" "ENSG00000229660.1"
## [301] "ENSG00000285534.1" "ENSG00000228345.2" "ENSG00000279778.1"
## [304] "ENSG00000227920.3" "ENSG00000253051.1" "ENSG00000263499.1"
## [307] "ENSG00000287726.1" "ENSG00000277831.1" "ENSG00000280047.1"
## [310] "ENSG00000226096.1" "ENSG00000221461.2" "ENSG00000258760.1"
## [313] "ENSG00000258088.1" "ENSG00000259200.2" "ENSG00000248702.1"
## [316] "ENSG00000206814.1" "ENSG00000284652.1" "ENSG00000260788.6"
## [319] "ENSG00000286757.2" "ENSG00000255446.1" "ENSG00000258399.7"
## [322] "ENSG00000186244.7" "ENSG00000212580.1" "ENSG00000255021.1"
## [325] "ENSG00000176515.1" "ENSG00000233981.1" "ENSG00000237920.2"
## [328] "ENSG00000270927.1" "ENSG00000260304.1" "ENSG00000287882.1"
## [331] "ENSG00000232072.1" "ENSG00000244662.1" "ENSG00000285775.1"
## [334] "ENSG00000273497.1" "ENSG00000227775.3" "ENSG00000234017.1"
## [337] "ENSG00000259989.1" "ENSG00000229607.1" "ENSG00000267579.1"
## [340] "ENSG00000254186.3" "ENSG00000285623.1" "ENSG00000254458.1"
## [343] "ENSG00000203739.4" "ENSG00000271384.1" "ENSG00000286276.1"
## [346] "ENSG00000266101.1" "ENSG00000271795.1" "ENSG00000199196.1"
## [349] "ENSG00000277938.1" "ENSG00000207210.1" "ENSG00000268926.3"
## [352] "ENSG00000286101.1" "ENSG00000270174.1" "ENSG00000255605.1"
## [355] "ENSG00000257726.1" "ENSG00000254538.1" "ENSG00000278999.1"
## [358] "ENSG00000230404.1" "ENSG00000212590.1" "ENSG00000242757.1"
## [361] "ENSG00000256708.1" "ENSG00000248733.1" "ENSG00000271269.1"
## [364] "ENSG00000235152.1" "ENSG00000286683.1" "ENSG00000236466.2"
## [367] "ENSG00000199751.1" "ENSG00000241354.2" "ENSG00000224593.1"
## [370] "ENSG00000223027.1" "ENSG00000199200.2" "ENSG00000276332.1"
## [373] "ENSG00000277050.1" "ENSG00000255545.8" "ENSG00000197099.8"
## [376] "ENSG00000270120.1" "ENSG00000237844.2" "ENSG00000203446.2"
## [379] "ENSG00000287010.1" "ENSG00000243116.1" "ENSG00000228176.1"
## [382] "ENSG00000201393.1" "ENSG00000286176.2" "ENSG00000231057.4"
## [385] "ENSG00000283579.1" "ENSG00000259771.1" "ENSG00000223727.6"
## [388] "ENSG00000258111.1" "ENSG00000271475.1"
table(genes$`Higher methylation in hPGCLCs`%in% Genes_df$ensembl_gene_id_version)
##
## FALSE TRUE
## 162 591
<- Genes_df[Genes_df$ensembl_gene_id_version %in% genes$`Higher methylation in hPGCLCs`, "entrez_gene_id"]
DMG_uphPGCLCsvshEGCLCs <- na.omit(DMG_uphPGCLCsvshEGCLCs[!DMG_uphPGCLCsvshEGCLCs %in% ""])
DMG_uphPGCLCsvshEGCLCs <- Genes_df[Genes_df$ensembl_gene_id_version %in% genes$`Higher methylation in hEGCLCs`, "entrez_gene_id"]
DMG_downhPGCLCsvshEGCLCs <- na.omit(DMG_downhPGCLCsvshEGCLCs[!DMG_downhPGCLCsvshEGCLCs %in% ""]) DMG_downhPGCLCsvshEGCLCs
any(duplicated(DMG_uphPGCLCsvshEGCLCs))
## [1] FALSE
any(duplicated(DMG_downhPGCLCsvshEGCLCs))
## [1] FALSE
$`Higher methylation in hPGCLCs` <- DMG_uphPGCLCsvshEGCLCs
genes$`Higher methylation in hEGCLCs` <- DMG_downhPGCLCsvshEGCLCs genes
<- compareCluster(geneCluster = genes,
compKEGG fun = "enrichKEGG",
organism="hsa",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
universe = GeneUniverse_entrezid)
There are no significant terms (They have Padj>0.05)
dotplot(compKEGG, showCategory = 15, title = "KEGG Pathway Enrichment Analysis")
<- compareCluster(geneCluster = genes,
compGO fun = "enrichGO",
keyType = "ENTREZID",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
OrgDb='org.Hs.eg.db',
universe=GeneUniverse_entrezid,
ont = "ALL")
dotplot(compGO, showCategory = 15, title = "GO Enrichment Analysis")
A new DMRs_annotated
object is created in which
annotation from ChIPseeker
is added.
mcols(DMRs_annotated$hEGCLCsvshPGCLCs)$ChipSeekerAnn <- NA
mcols(DMRs_annotated$hEGCLCsvshPGCLCs)$ChipSeekerDistanceToTss <- NA
mcols(DMRs_annotated$hEGCLCsvshPGCLCs)$ChipSeekerGeneId <- NA
<- findOverlaps(DMRs_annotated$hEGCLCsvshPGCLCs, peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)
m mcols(DMRs_annotated$hEGCLCsvshPGCLCs)[queryHits(m), "ChipSeekerAnn"] <- mcols(peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)[subjectHits(m), "annotation"]
mcols(DMRs_annotated$hEGCLCsvshPGCLCs)[queryHits(m), "ChipSeekerDistanceToTss"] <- mcols(peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)[subjectHits(m), "distanceToTSS"]
mcols(DMRs_annotated$hEGCLCsvshPGCLCs)[queryHits(m), "ChipSeekerGeneId"] <- mcols(peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)[subjectHits(m), "geneId"]
<- findOverlaps(DMRs_annotated$hEGCLCsvshPGCLCs, peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)
m mcols(DMRs_annotated$hEGCLCsvshPGCLCs)[queryHits(m), "ChipSeekerAnn"] <- mcols(peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)[subjectHits(m), "annotation"]
mcols(DMRs_annotated$hEGCLCsvshPGCLCs)[queryHits(m), "ChipSeekerDistanceToTss"] <- mcols(peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)[subjectHits(m), "distanceToTSS"]
mcols(DMRs_annotated$hEGCLCsvshPGCLCs)[queryHits(m), "ChipSeekerGeneId"] <- mcols(peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)[subjectHits(m), "geneId"]
Adding entrez gene id, gene symbol and gene information
$hEGCLCsvshPGCLCs <- as.data.frame(DMRs_annotated$hEGCLCsvshPGCLCs)
DMRs_annotated$hEGCLCsvshPGCLCs <- dplyr::left_join(DMRs_annotated$hEGCLCsvshPGCLCs, Genes_df, by=c('ChipSeekerGeneId' = 'ensembl_gene_id_version')) DMRs_annotated
$hEGCLCsvshPGCLCs[!is.na(DMRs_annotated$hEGCLCsvshPGCLCs$hgnc_symbol),] %>% select(-c("ChipSeekerDistanceToTss", "ChipSeekerGeneId", "ensembl_gene_id", "entrez_gene_id")) %>% distinct() %>%
DMRs_annotateddatatable(class = 'hover', rownames = FALSE, caption="DMRs - hEGCLCsvshPGCLCs", extension='Buttons', escape = FALSE,
options = list(pageLength=20, dom='Bfrtip', autoWidth=TRUE,
buttons=list(c('csv', 'excel'))))
ChIPseeker
DMRs are now annotated to their associated genes and genomic regions
using the annotatePeaks
function of the
ChIPseeker
R package, using a TxDb object. The GenomicFeatures
package uses TxDb
objects to store transcript metadata.
This class maps the 5’ and 3’ untranslated regions (UTRs), protein
coding sequences (CDSs) and exons for a set of mRNA transcripts to their
associated genome. All TxDb
objects are backed by a SQLite
database that manages genomic locations and the relationships between
pre-processed mRNA transcripts, exons, protein coding sequences, and
their related gene identifiers.
TxDb created by us
<- lapply(list("Higher methylation in hPGCLCs" = DMRs_GRanges$hiPSCsvshPGCLCs[DMRs_GRanges$hiPSCsvshPGCLCs$diff.Methy<0,], "Higher methylation in hiPSCs" = DMRs_GRanges$hiPSCsvshPGCLCs[DMRs_GRanges$hiPSCsvshPGCLCs$diff.Methy>0,]), annotatePeak, TxDb=txdb_v35, tssRegion=c(-3000, 3000), verbose=FALSE) peakAnnoList
::plotAnnoBar(peakAnnoList) ChIPseeker
::plotDistToTSS(peakAnnoList) ChIPseeker
Given a list of gene set, compareCluster
function will
compute profiles of each gene cluster.
= lapply(peakAnnoList, function(i) unique(as.data.frame(i)$geneId))
genes names(genes) = sub("_", "\n", names(genes))
any(duplicated(genes$`Higher methylation in hPGCLCs`))
## [1] FALSE
any(duplicated(genes$`Higher methylation in hiPSCs`))
## [1] FALSE
There are no duplicated ensembl ids
table(genes$`Higher methylation in hiPSCs` %in% Genes_df$ensembl_gene_id_version)
##
## FALSE TRUE
## 465 1600
$`Higher methylation in hiPSCs` [!genes$`Higher methylation in hiPSCs` %in% Genes_df$ensembl_gene_id_version]
genes## [1] "ENSG00000272849.1" "ENSG00000224079.1" "ENSG00000225339.4"
## [4] "ENSG00000275178.1" "ENSG00000288549.1" "ENSG00000275088.1"
## [7] "ENSG00000278716.1" "ENSG00000253395.1" "ENSG00000267665.2"
## [10] "ENSG00000286295.1" "ENSG00000233052.1" "ENSG00000230676.1"
## [13] "ENSG00000280326.1" "ENSG00000237326.1" "ENSG00000263017.1"
## [16] "ENSG00000286535.1" "ENSG00000267036.2" "ENSG00000237371.1"
## [19] "ENSG00000285163.1" "ENSG00000284691.1" "ENSG00000285855.1"
## [22] "ENSG00000282718.1" "ENSG00000279135.1" "ENSG00000234275.4"
## [25] "ENSG00000287428.1" "ENSG00000287407.1" "ENSG00000259362.2"
## [28] "ENSG00000287823.1" "ENSG00000253102.1" "ENSG00000237126.8"
## [31] "ENSG00000254755.1" "ENSG00000279412.1" "ENSG00000276166.1"
## [34] "ENSG00000259341.2" "ENSG00000238031.1" "ENSG00000279065.1"
## [37] "ENSG00000272988.1" "ENSG00000238010.1" "ENSG00000270490.1"
## [40] "ENSG00000230526.1" "ENSG00000241449.6" "ENSG00000234793.1"
## [43] "ENSG00000268189.2" "ENSG00000233891.8" "ENSG00000205325.3"
## [46] "ENSG00000255845.1" "ENSG00000249808.3" "ENSG00000262652.1"
## [49] "ENSG00000199949.2" "ENSG00000253397.1" "ENSG00000231113.2"
## [52] "ENSG00000256564.1" "ENSG00000258717.1" "ENSG00000237372.4"
## [55] "ENSG00000176515.1" "ENSG00000215381.3" "ENSG00000270911.1"
## [58] "ENSG00000245688.1" "ENSG00000279931.1" "ENSG00000253543.1"
## [61] "ENSG00000203739.4" "ENSG00000226409.3" "ENSG00000286364.1"
## [64] "ENSG00000231482.3" "ENSG00000228613.1" "ENSG00000242757.1"
## [67] "ENSG00000262884.1" "ENSG00000279711.1" "ENSG00000258698.1"
## [70] "ENSG00000257883.1" "ENSG00000279774.1" "ENSG00000268750.7"
## [73] "ENSG00000261054.1" "ENSG00000238232.1" "ENSG00000286240.1"
## [76] "ENSG00000275741.1" "ENSG00000284966.3" "ENSG00000280444.1"
## [79] "ENSG00000287338.1" "ENSG00000250577.1" "ENSG00000267090.1"
## [82] "ENSG00000287161.1" "ENSG00000287787.1" "ENSG00000285773.1"
## [85] "ENSG00000228830.1" "ENSG00000261976.2" "ENSG00000235811.1"
## [88] "ENSG00000286286.1" "ENSG00000216775.3" "ENSG00000229660.1"
## [91] "ENSG00000256314.1" "ENSG00000228154.3" "ENSG00000226047.1"
## [94] "ENSG00000286610.1" "ENSG00000261938.1" "ENSG00000279705.1"
## [97] "ENSG00000286333.1" "ENSG00000287046.1" "ENSG00000201026.1"
## [100] "ENSG00000230990.2" "ENSG00000205037.2" "ENSG00000253445.1"
## [103] "ENSG00000234428.2" "ENSG00000280004.1" "ENSG00000269667.2"
## [106] "ENSG00000269976.1" "ENSG00000270124.1" "ENSG00000249236.2"
## [109] "ENSG00000259961.1" "ENSG00000286805.1" "ENSG00000248962.1"
## [112] "ENSG00000287073.1" "ENSG00000274373.1" "ENSG00000235994.4"
## [115] "ENSG00000254607.2" "ENSG00000230163.1" "ENSG00000282952.1"
## [118] "ENSG00000284666.1" "ENSG00000231539.1" "ENSG00000267364.2"
## [121] "ENSG00000278330.1" "ENSG00000253307.1" "ENSG00000231876.8"
## [124] "ENSG00000226868.1" "ENSG00000287632.1" "ENSG00000242009.2"
## [127] "ENSG00000231170.7" "ENSG00000279076.1" "ENSG00000206814.1"
## [130] "ENSG00000282097.1" "ENSG00000253140.2" "ENSG00000229628.1"
## [133] "ENSG00000226169.1" "ENSG00000279668.3" "ENSG00000250338.1"
## [136] "ENSG00000250409.1" "ENSG00000275155.1" "ENSG00000279841.1"
## [139] "ENSG00000232408.1" "ENSG00000254027.1" "ENSG00000256955.2"
## [142] "ENSG00000225028.1" "ENSG00000273160.1" "ENSG00000287089.1"
## [145] "ENSG00000254933.1" "ENSG00000287235.1" "ENSG00000250230.3"
## [148] "ENSG00000260473.2" "ENSG00000226542.1" "ENSG00000234949.2"
## [151] "ENSG00000224545.1" "ENSG00000236676.1" "ENSG00000256481.1"
## [154] "ENSG00000254364.1" "ENSG00000201196.1" "ENSG00000196696.13"
## [157] "ENSG00000225140.1" "ENSG00000279261.2" "ENSG00000203647.2"
## [160] "ENSG00000287811.1" "ENSG00000217455.8" "ENSG00000287756.1"
## [163] "ENSG00000231073.1" "ENSG00000225092.2" "ENSG00000219133.2"
## [166] "ENSG00000254111.3" "ENSG00000259920.1" "ENSG00000261606.5"
## [169] "ENSG00000254777.5" "ENSG00000287680.1" "ENSG00000235538.3"
## [172] "ENSG00000279430.1" "ENSG00000286198.1" "ENSG00000254205.1"
## [175] "ENSG00000230258.6" "ENSG00000259252.1" "ENSG00000287457.1"
## [178] "ENSG00000250576.1" "ENSG00000288040.1" "ENSG00000254290.1"
## [181] "ENSG00000226087.2" "ENSG00000286635.1" "ENSG00000278973.1"
## [184] "ENSG00000233981.1" "ENSG00000262869.1" "ENSG00000201042.1"
## [187] "ENSG00000227527.2" "ENSG00000228361.2" "ENSG00000254559.1"
## [190] "ENSG00000286502.1" "ENSG00000284705.1" "ENSG00000201549.1"
## [193] "ENSG00000243491.1" "ENSG00000237720.2" "ENSG00000236800.1"
## [196] "ENSG00000224195.1" "ENSG00000283782.2" "ENSG00000237987.1"
## [199] "ENSG00000286607.1" "ENSG00000237617.1" "ENSG00000276476.3"
## [202] "ENSG00000260185.1" "ENSG00000248359.2" "ENSG00000225315.2"
## [205] "ENSG00000287162.1" "ENSG00000234378.1" "ENSG00000288605.1"
## [208] "ENSG00000287828.1" "ENSG00000286498.1" "ENSG00000279526.1"
## [211] "ENSG00000254789.3" "ENSG00000286471.1" "ENSG00000285707.1"
## [214] "ENSG00000232716.2" "ENSG00000279087.1" "ENSG00000280270.1"
## [217] "ENSG00000287822.1" "ENSG00000258560.1" "ENSG00000286641.1"
## [220] "ENSG00000262445.3" "ENSG00000225156.2" "ENSG00000250007.7"
## [223] "ENSG00000260454.2" "ENSG00000272097.2" "ENSG00000242531.1"
## [226] "ENSG00000229321.2" "ENSG00000258935.1" "ENSG00000235840.1"
## [229] "ENSG00000248645.1" "ENSG00000261555.1" "ENSG00000229751.1"
## [232] "ENSG00000261702.2" "ENSG00000284188.2" "ENSG00000249199.2"
## [235] "ENSG00000221461.2" "ENSG00000251270.1" "ENSG00000261189.1"
## [238] "ENSG00000256452.1" "ENSG00000231024.1" "ENSG00000229673.1"
## [241] "ENSG00000249727.2" "ENSG00000231078.1" "ENSG00000271114.1"
## [244] "ENSG00000226706.1" "ENSG00000283511.1" "ENSG00000228683.1"
## [247] "ENSG00000286320.1" "ENSG00000259579.1" "ENSG00000226927.1"
## [250] "ENSG00000268051.1" "ENSG00000286356.1" "ENSG00000277825.1"
## [253] "ENSG00000277319.1" "ENSG00000217514.1" "ENSG00000273209.1"
## [256] "ENSG00000233633.2" "ENSG00000212589.1" "ENSG00000258598.1"
## [259] "ENSG00000263723.1" "ENSG00000288079.1" "ENSG00000287560.1"
## [262] "ENSG00000279409.1" "ENSG00000267245.1" "ENSG00000235023.1"
## [265] "ENSG00000286963.1" "ENSG00000201584.1" "ENSG00000248521.1"
## [268] "ENSG00000235563.1" "ENSG00000285804.2" "ENSG00000236936.1"
## [271] "ENSG00000274379.1" "ENSG00000283128.2" "ENSG00000260004.1"
## [274] "ENSG00000280217.1" "ENSG00000262966.2" "ENSG00000228345.2"
## [277] "ENSG00000201541.1" "ENSG00000285534.1" "ENSG00000260640.1"
## [280] "ENSG00000287882.1" "ENSG00000286119.1" "ENSG00000287145.1"
## [283] "ENSG00000268080.2" "ENSG00000231951.1" "ENSG00000280354.1"
## [286] "ENSG00000225632.1" "ENSG00000288639.1" "ENSG00000259420.5"
## [289] "ENSG00000261472.1" "ENSG00000286390.1" "ENSG00000212580.1"
## [292] "ENSG00000280222.1" "ENSG00000246792.3" "ENSG00000267658.1"
## [295] "ENSG00000262810.1" "ENSG00000235240.1" "ENSG00000226096.1"
## [298] "ENSG00000251445.1" "ENSG00000249930.1" "ENSG00000237422.2"
## [301] "ENSG00000235096.2" "ENSG00000233096.1" "ENSG00000286526.1"
## [304] "ENSG00000231251.1" "ENSG00000261723.1" "ENSG00000234967.2"
## [307] "ENSG00000254458.1" "ENSG00000244260.2" "ENSG00000249213.1"
## [310] "ENSG00000227920.3" "ENSG00000229618.3" "ENSG00000235886.1"
## [313] "ENSG00000271360.1" "ENSG00000272922.1" "ENSG00000224099.2"
## [316] "ENSG00000234017.1" "ENSG00000261177.1" "ENSG00000261095.1"
## [319] "ENSG00000270531.1" "ENSG00000230385.1" "ENSG00000272715.1"
## [322] "ENSG00000202318.1" "ENSG00000285775.1" "ENSG00000236856.1"
## [325] "ENSG00000224794.2" "ENSG00000232072.1" "ENSG00000224731.1"
## [328] "ENSG00000250192.1" "ENSG00000263033.2" "ENSG00000199196.1"
## [331] "ENSG00000285367.1" "ENSG00000284652.1" "ENSG00000230404.1"
## [334] "ENSG00000286505.1" "ENSG00000224815.3" "ENSG00000254519.5"
## [337] "ENSG00000269919.1" "ENSG00000254002.2" "ENSG00000227775.3"
## [340] "ENSG00000264750.1" "ENSG00000223027.1" "ENSG00000248702.1"
## [343] "ENSG00000287923.1" "ENSG00000248753.1" "ENSG00000280392.1"
## [346] "ENSG00000226965.2" "ENSG00000255446.1" "ENSG00000288615.1"
## [349] "ENSG00000220739.2" "ENSG00000249738.10" "ENSG00000259200.2"
## [352] "ENSG00000286757.2" "ENSG00000203392.3" "ENSG00000253051.1"
## [355] "ENSG00000230233.1" "ENSG00000226398.1" "ENSG00000230647.2"
## [358] "ENSG00000278071.1" "ENSG00000232057.1" "ENSG00000260788.6"
## [361] "ENSG00000272215.1" "ENSG00000279778.1" "ENSG00000286834.1"
## [364] "ENSG00000262116.1" "ENSG00000186244.7" "ENSG00000268926.3"
## [367] "ENSG00000277831.1" "ENSG00000258399.7" "ENSG00000212590.1"
## [370] "ENSG00000253608.2" "ENSG00000197099.8" "ENSG00000267455.1"
## [373] "ENSG00000233969.2" "ENSG00000213076.3" "ENSG00000259202.1"
## [376] "ENSG00000285623.1" "ENSG00000270174.1" "ENSG00000248128.1"
## [379] "ENSG00000271384.1" "ENSG00000287903.1" "ENSG00000256221.1"
## [382] "ENSG00000229607.1" "ENSG00000255021.1" "ENSG00000256708.1"
## [385] "ENSG00000271709.1" "ENSG00000271795.1" "ENSG00000230698.1"
## [388] "ENSG00000249642.2" "ENSG00000287726.1" "ENSG00000228873.1"
## [391] "ENSG00000267579.1" "ENSG00000242444.3" "ENSG00000251010.1"
## [394] "ENSG00000261248.1" "ENSG00000263499.1" "ENSG00000237920.2"
## [397] "ENSG00000280047.1" "ENSG00000286276.1" "ENSG00000255605.1"
## [400] "ENSG00000243116.1" "ENSG00000224593.1" "ENSG00000270927.1"
## [403] "ENSG00000241354.2" "ENSG00000277938.1" "ENSG00000213065.2"
## [406] "ENSG00000255545.8" "ENSG00000228176.1" "ENSG00000260304.1"
## [409] "ENSG00000259868.3" "ENSG00000226571.2" "ENSG00000259989.1"
## [412] "ENSG00000237844.2" "ENSG00000254321.2" "ENSG00000233834.6"
## [415] "ENSG00000266101.1" "ENSG00000254538.1" "ENSG00000271269.1"
## [418] "ENSG00000203446.2" "ENSG00000224414.1" "ENSG00000231057.4"
## [421] "ENSG00000270591.1" "ENSG00000279086.1" "ENSG00000224322.2"
## [424] "ENSG00000283078.1" "ENSG00000226957.1" "ENSG00000258760.1"
## [427] "ENSG00000201393.1" "ENSG00000260404.3" "ENSG00000215450.2"
## [430] "ENSG00000264754.1" "ENSG00000207210.1" "ENSG00000287220.1"
## [433] "ENSG00000199751.1" "ENSG00000270120.1" "ENSG00000287175.1"
## [436] "ENSG00000238503.1" "ENSG00000236389.1" "ENSG00000286101.1"
## [439] "ENSG00000261347.1" "ENSG00000234929.2" "ENSG00000199200.2"
## [442] "ENSG00000287975.1" "ENSG00000275880.1" "ENSG00000226266.6"
## [445] "ENSG00000248733.1" "ENSG00000286948.1" "ENSG00000276332.1"
## [448] "ENSG00000254001.5" "ENSG00000223727.6" "ENSG00000250961.2"
## [451] "ENSG00000234223.2" "ENSG00000280015.1" "ENSG00000228108.1"
## [454] "ENSG00000257726.1" "ENSG00000236466.2" "ENSG00000287387.1"
## [457] "ENSG00000283579.1" "ENSG00000236842.1" "ENSG00000234860.2"
## [460] "ENSG00000260171.1" "ENSG00000258620.2" "ENSG00000253354.2"
## [463] "ENSG00000223967.1" "ENSG00000271475.1" "ENSG00000257258.2"
table(genes$`Higher methylation in hPGCLCs`%in% Genes_df$ensembl_gene_id_version)
##
## FALSE TRUE
## 9 65
<- Genes_df[Genes_df$ensembl_gene_id_version %in% genes$`Higher methylation in hPGCLCs`, "entrez_gene_id"]
DMG_uphPGCLCsvshiPSCs <- na.omit(DMG_uphPGCLCsvshiPSCs[!DMG_uphPGCLCsvshiPSCs %in% ""])
DMG_uphPGCLCsvshiPSCs <- Genes_df[Genes_df$ensembl_gene_id_version %in% genes$`Higher methylation in hiPSCs`, "entrez_gene_id"]
DMG_downhPGCLCsvshiPSCs <- na.omit(DMG_downhPGCLCsvshiPSCs[!DMG_downhPGCLCsvshiPSCs %in% ""]) DMG_downhPGCLCsvshiPSCs
any(duplicated(DMG_uphPGCLCsvshiPSCs))
## [1] FALSE
any(duplicated(DMG_downhPGCLCsvshiPSCs))
## [1] FALSE
$`Higher methylation in hPGCLCs` <- DMG_uphPGCLCsvshiPSCs
genes$`Higher methylation in hiPSCs` <- DMG_downhPGCLCsvshiPSCs genes
<- compareCluster(geneCluster = genes,
compKEGG fun = "enrichKEGG",
organism="hsa",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
universe = GeneUniverse_entrezid)
There are no significant terms (They have Padj>0.05)
dotplot(compKEGG, showCategory = 15, title = "KEGG Pathway Enrichment Analysis")
<- compareCluster(geneCluster = genes,
compGO fun = "enrichGO",
keyType = "ENTREZID",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
OrgDb='org.Hs.eg.db',
universe=GeneUniverse_entrezid,
ont = "ALL")
dotplot(compGO, showCategory = 15, title = "GO Enrichment Analysis")
A new DMRs_annotated
object is created in which
annotation from ChIPseeker
is added.
mcols(DMRs_annotated$hiPSCsvshPGCLCs)$ChipSeekerAnn <- NA
mcols(DMRs_annotated$hiPSCsvshPGCLCs)$ChipSeekerDistanceToTss <- NA
mcols(DMRs_annotated$hiPSCsvshPGCLCs)$ChipSeekerGeneId <- NA
<- findOverlaps(DMRs_annotated$hiPSCsvshPGCLCs, peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)
m mcols(DMRs_annotated$hiPSCsvshPGCLCs)[queryHits(m), "ChipSeekerAnn"] <- mcols(peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)[subjectHits(m), "annotation"]
mcols(DMRs_annotated$hiPSCsvshPGCLCs)[queryHits(m), "ChipSeekerDistanceToTss"] <- mcols(peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)[subjectHits(m), "distanceToTSS"]
mcols(DMRs_annotated$hiPSCsvshPGCLCs)[queryHits(m), "ChipSeekerGeneId"] <- mcols(peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)[subjectHits(m), "geneId"]
<- findOverlaps(DMRs_annotated$hiPSCsvshPGCLCs, peakAnnoList[["Higher methylation in hiPSCs"]]@anno)
m mcols(DMRs_annotated$hiPSCsvshPGCLCs)[queryHits(m), "ChipSeekerAnn"] <- mcols(peakAnnoList[["Higher methylation in hiPSCs"]]@anno)[subjectHits(m), "annotation"]
mcols(DMRs_annotated$hiPSCsvshPGCLCs)[queryHits(m), "ChipSeekerDistanceToTss"] <- mcols(peakAnnoList[["Higher methylation in hiPSCs"]]@anno)[subjectHits(m), "distanceToTSS"]
mcols(DMRs_annotated$hiPSCsvshPGCLCs)[queryHits(m), "ChipSeekerGeneId"] <- mcols(peakAnnoList[["Higher methylation in hiPSCs"]]@anno)[subjectHits(m), "geneId"]
Adding entrez gene id, gene symbol and gene information
$hiPSCsvshPGCLCs <- as.data.frame(DMRs_annotated$hiPSCsvshPGCLCs)
DMRs_annotated$hiPSCsvshPGCLCs <- dplyr::left_join(DMRs_annotated$hiPSCsvshPGCLCs, Genes_df, by=c('ChipSeekerGeneId' = 'ensembl_gene_id_version')) DMRs_annotated
$hiPSCsvshPGCLCs[!is.na(DMRs_annotated$hiPSCsvshPGCLCs$hgnc_symbol),] %>% select(-c("ChipSeekerDistanceToTss", "ChipSeekerGeneId", "ensembl_gene_id", "entrez_gene_id")) %>% distinct() %>%
DMRs_annotateddatatable(class = 'hover', rownames = FALSE, caption="DMRs - hiPSCsvshPGCLCs", extension='Buttons', escape = FALSE,
options = list(pageLength=20, dom='Bfrtip', autoWidth=TRUE,
buttons=list(c('csv', 'excel'))))
ChIPseeker
DMRs are now annotated to their associated genes and genomic regions
using the annotatePeaks
function of the
ChIPseeker
R package, using a TxDb object. The GenomicFeatures
package uses TxDb
objects to store transcript metadata.
This class maps the 5’ and 3’ untranslated regions (UTRs), protein
coding sequences (CDSs) and exons for a set of mRNA transcripts to their
associated genome. All TxDb
objects are backed by a SQLite
database that manages genomic locations and the relationships between
pre-processed mRNA transcripts, exons, protein coding sequences, and
their related gene identifiers.
TxDb created by us
There are no DMRs where the methylation is higher in iMeLCs with respect to hiPSCs.
<- annotatePeak(DMRs_GRanges$hiPSCsvsiMeLCs, TxDb=txdb_v35, tssRegion=c(-3000, 3000), verbose=FALSE) peakAnnoList
::plotAnnoBar(peakAnnoList) ChIPseeker
::plotDistToTSS(peakAnnoList) ChIPseeker
Given a list of gene set, compareCluster
function will
compute profiles of each gene cluster.
= unique(as.data.frame(peakAnnoList)$geneId)
genes names(genes) = sub("_", "\n", names(genes))
any(duplicated(genes))
## [1] FALSE
There are no duplicated ensembl ids
table(genes %in% Genes_df$ensembl_gene_id_version)
##
## FALSE TRUE
## 21 44
!genes%in% Genes_df$ensembl_gene_id_version]
genes[## <NA> <NA> <NA> <NA>
## "ENSG00000272849.1" "ENSG00000259362.2" "ENSG00000228613.1" "ENSG00000262884.1"
## <NA> <NA> <NA> <NA>
## "ENSG00000271218.1" "ENSG00000284691.1" "ENSG00000238031.1" "ENSG00000203647.2"
## <NA> <NA> <NA> <NA>
## "ENSG00000249526.1" "ENSG00000282952.1" "ENSG00000201026.1" "ENSG00000255585.3"
## <NA> <NA> <NA> <NA>
## "ENSG00000267366.1" "ENSG00000236426.5" "ENSG00000285707.1" "ENSG00000229618.3"
## <NA> <NA> <NA> <NA>
## "ENSG00000246792.3" "ENSG00000230003.2" "ENSG00000286805.1" "ENSG00000229673.1"
## <NA>
## "ENSG00000228361.2"
<- Genes_df[Genes_df$ensembl_gene_id_version %in% genes, "entrez_gene_id"]
DMG_downiMeLCsvshiPSCs <- na.omit(DMG_downiMeLCsvshiPSCs[!DMG_downiMeLCsvshiPSCs %in% ""]) DMG_downiMeLCsvshiPSCs
any(duplicated(DMG_downiMeLCsvshiPSCs))
## [1] FALSE
<- DMG_downiMeLCsvshiPSCs genes
# compKEGG <- compareCluster(geneCluster = genes,
# fun = "enrichKEGG",
# organism="hsa",
# pvalueCutoff = 0.05,
# pAdjustMethod = "BH",
# universe = GeneUniverse_entrezid)
#dotplot(compKEGG, showCategory = 15, title = "KEGG Pathway Enrichment Analysis") #It fails because too few genes.
# compGO <- compareCluster(geneCluster = genes,
# fun = "enrichGO",
# keyType = "ENTREZID",
# pvalueCutoff = 0.05,
# pAdjustMethod = "BH",
# OrgDb='org.Hs.eg.db',
# universe=GeneUniverse_entrezid,
# ont = "ALL")
#dotplot(compGO, showCategory = 15, title = "GO Enrichment Analysis")
A new DMRs_annotated
object is created in which
annotation from ChIPseeker
is added.
mcols(DMRs_annotated$hiPSCsvsiMeLCs)$ChipSeekerAnn <- NA
mcols(DMRs_annotated$hiPSCsvsiMeLCs)$ChipSeekerDistanceToTss <- NA
mcols(DMRs_annotated$hiPSCsvsiMeLCs)$ChipSeekerGeneId <- NA
<- findOverlaps(DMRs_annotated$hiPSCsvsiMeLCs, peakAnnoList@anno)
m mcols(DMRs_annotated$hiPSCsvsiMeLCs)[queryHits(m), "ChipSeekerAnn"] <- mcols(peakAnnoList@anno)[subjectHits(m), "annotation"]
mcols(DMRs_annotated$hiPSCsvsiMeLCs)[queryHits(m), "ChipSeekerDistanceToTss"] <- mcols(peakAnnoList@anno)[subjectHits(m), "distanceToTSS"]
mcols(DMRs_annotated$hiPSCsvsiMeLCs)[queryHits(m), "ChipSeekerGeneId"] <- mcols(peakAnnoList@anno)[subjectHits(m), "geneId"]
Adding entrez gene id, gene symbol and gene information
$hiPSCsvsiMeLCs <- as.data.frame(DMRs_annotated$hiPSCsvsiMeLCs)
DMRs_annotated$hiPSCsvsiMeLCs <- dplyr::left_join(DMRs_annotated$hiPSCsvsiMeLCs, Genes_df, by=c('ChipSeekerGeneId' = 'ensembl_gene_id_version')) DMRs_annotated
$hiPSCsvsiMeLCs[!is.na(DMRs_annotated$hiPSCsvsiMeLCs$hgnc_symbol),] %>% select(-c("ChipSeekerDistanceToTss", "ChipSeekerGeneId", "ensembl_gene_id", "entrez_gene_id")) %>% distinct() %>%
DMRs_annotateddatatable(class = 'hover', rownames = FALSE, caption="DMRs - hiPSCsvsiMeLCs", extension='Buttons', escape = FALSE,
options = list(pageLength=20, dom='Bfrtip', autoWidth=TRUE,
buttons=list(c('csv', 'excel'))))
ChIPseeker
DMRs are now annotated to their associated genes and genomic regions
using the annotatePeaks
function of the
ChIPseeker
R package, using a TxDb object. The GenomicFeatures
package uses TxDb
objects to store transcript metadata.
This class maps the 5’ and 3’ untranslated regions (UTRs), protein
coding sequences (CDSs) and exons for a set of mRNA transcripts to their
associated genome. All TxDb
objects are backed by a SQLite
database that manages genomic locations and the relationships between
pre-processed mRNA transcripts, exons, protein coding sequences, and
their related gene identifiers.
TxDb created by us
<- lapply(list("Higher methylation in hPGCLCs" = DMRs_GRanges$iMeLCsvshPGCLCs[DMRs_GRanges$iMeLCsvshPGCLCs$diff.Methy<0,], "Higher methylation in iMeLCs" = DMRs_GRanges$iMeLCsvshPGCLCs[DMRs_GRanges$iMeLCsvshPGCLCs$diff.Methy>0,]), annotatePeak, TxDb=txdb_v35, tssRegion=c(-3000, 3000), verbose=FALSE) peakAnnoList
::plotAnnoBar(peakAnnoList) ChIPseeker
::plotDistToTSS(peakAnnoList) ChIPseeker
Given a list of gene set, compareCluster
function will
compute profiles of each gene cluster.
= lapply(peakAnnoList, function(i) unique(as.data.frame(i)$geneId))
genes names(genes) = sub("_", "\n", names(genes))
any(duplicated(genes$`Higher methylation in hPGCLCs`))
## [1] FALSE
any(duplicated(genes$`Higher methylation in iMeLCs`))
## [1] FALSE
There are no duplicated ensembl ids
table(genes$`Higher methylation in iMeLCs` %in% Genes_df$ensembl_gene_id_version)
##
## FALSE TRUE
## 284 960
$`Higher methylation in iMeLCs` [!genes$`Higher methylation in iMeLCs` %in% Genes_df$ensembl_gene_id_version]
genes## [1] "ENSG00000225339.4" "ENSG00000224079.1" "ENSG00000275178.1"
## [4] "ENSG00000288549.1" "ENSG00000275088.1" "ENSG00000253395.1"
## [7] "ENSG00000233052.1" "ENSG00000267665.2" "ENSG00000278716.1"
## [10] "ENSG00000282718.1" "ENSG00000286295.1" "ENSG00000280326.1"
## [13] "ENSG00000237326.1" "ENSG00000285163.1" "ENSG00000285855.1"
## [16] "ENSG00000263017.1" "ENSG00000230676.1" "ENSG00000286535.1"
## [19] "ENSG00000287823.1" "ENSG00000279412.1" "ENSG00000254755.1"
## [22] "ENSG00000237126.8" "ENSG00000287428.1" "ENSG00000215381.3"
## [25] "ENSG00000276166.1" "ENSG00000267036.2" "ENSG00000234275.4"
## [28] "ENSG00000241449.6" "ENSG00000272988.1" "ENSG00000231113.2"
## [31] "ENSG00000262652.1" "ENSG00000233891.8" "ENSG00000249808.3"
## [34] "ENSG00000230526.1" "ENSG00000176515.1" "ENSG00000279065.1"
## [37] "ENSG00000256564.1" "ENSG00000237372.4" "ENSG00000234793.1"
## [40] "ENSG00000259341.2" "ENSG00000279931.1" "ENSG00000255845.1"
## [43] "ENSG00000199949.2" "ENSG00000286240.1" "ENSG00000287787.1"
## [46] "ENSG00000237371.1" "ENSG00000267090.1" "ENSG00000279711.1"
## [49] "ENSG00000286364.1" "ENSG00000245688.1" "ENSG00000257883.1"
## [52] "ENSG00000270911.1" "ENSG00000268750.7" "ENSG00000287338.1"
## [55] "ENSG00000250577.1" "ENSG00000285773.1" "ENSG00000228154.3"
## [58] "ENSG00000226409.3" "ENSG00000230163.1" "ENSG00000287161.1"
## [61] "ENSG00000272849.1" "ENSG00000280444.1" "ENSG00000286333.1"
## [64] "ENSG00000253543.1" "ENSG00000279705.1" "ENSG00000287046.1"
## [67] "ENSG00000280004.1" "ENSG00000270972.1" "ENSG00000230990.2"
## [70] "ENSG00000238010.1" "ENSG00000261938.1" "ENSG00000261976.2"
## [73] "ENSG00000269667.2" "ENSG00000286286.1" "ENSG00000234428.2"
## [76] "ENSG00000254933.1" "ENSG00000253445.1" "ENSG00000278330.1"
## [79] "ENSG00000253307.1" "ENSG00000258698.1" "ENSG00000216775.3"
## [82] "ENSG00000203739.4" "ENSG00000286610.1" "ENSG00000249236.2"
## [85] "ENSG00000253140.2" "ENSG00000231539.1" "ENSG00000256481.1"
## [88] "ENSG00000250230.3" "ENSG00000226047.1" "ENSG00000227527.2"
## [91] "ENSG00000235994.4" "ENSG00000273160.1" "ENSG00000254777.5"
## [94] "ENSG00000287073.1" "ENSG00000286607.1" "ENSG00000229628.1"
## [97] "ENSG00000279261.2" "ENSG00000248962.1" "ENSG00000287235.1"
## [100] "ENSG00000287089.1" "ENSG00000253397.1" "ENSG00000236676.1"
## [103] "ENSG00000219133.2" "ENSG00000254111.3" "ENSG00000260185.1"
## [106] "ENSG00000259920.1" "ENSG00000279076.1" "ENSG00000225028.1"
## [109] "ENSG00000225315.2" "ENSG00000254559.1" "ENSG00000232408.1"
## [112] "ENSG00000254290.1" "ENSG00000260473.2" "ENSG00000250576.1"
## [115] "ENSG00000262445.3" "ENSG00000272097.2" "ENSG00000201042.1"
## [118] "ENSG00000238031.1" "ENSG00000248645.1" "ENSG00000287756.1"
## [121] "ENSG00000262869.1" "ENSG00000256452.1" "ENSG00000261606.5"
## [124] "ENSG00000280270.1" "ENSG00000258560.1" "ENSG00000250007.7"
## [127] "ENSG00000231876.8" "ENSG00000275155.1" "ENSG00000272922.1"
## [130] "ENSG00000232716.2" "ENSG00000267245.1" "ENSG00000242531.1"
## [133] "ENSG00000279526.1" "ENSG00000221461.2" "ENSG00000258935.1"
## [136] "ENSG00000235538.3" "ENSG00000251270.1" "ENSG00000226542.1"
## [139] "ENSG00000254205.1" "ENSG00000280354.1" "ENSG00000286635.1"
## [142] "ENSG00000201549.1" "ENSG00000225187.1" "ENSG00000287822.1"
## [145] "ENSG00000249727.2" "ENSG00000256955.2" "ENSG00000259579.1"
## [148] "ENSG00000212589.1" "ENSG00000226087.2" "ENSG00000226868.1"
## [151] "ENSG00000225092.2" "ENSG00000250338.1" "ENSG00000271114.1"
## [154] "ENSG00000217514.1" "ENSG00000233633.2" "ENSG00000286864.1"
## [157] "ENSG00000261702.2" "ENSG00000261095.1" "ENSG00000229321.2"
## [160] "ENSG00000234949.2" "ENSG00000277319.1" "ENSG00000231482.3"
## [163] "ENSG00000283128.2" "ENSG00000254789.3" "ENSG00000226927.1"
## [166] "ENSG00000286641.1" "ENSG00000243491.1" "ENSG00000263723.1"
## [169] "ENSG00000235840.1" "ENSG00000269919.1" "ENSG00000280217.1"
## [172] "ENSG00000201196.1" "ENSG00000226706.1" "ENSG00000201584.1"
## [175] "ENSG00000274379.1" "ENSG00000287882.1" "ENSG00000276476.3"
## [178] "ENSG00000279409.1" "ENSG00000261723.1" "ENSG00000202318.1"
## [181] "ENSG00000201541.1" "ENSG00000234378.1" "ENSG00000260454.2"
## [184] "ENSG00000287145.1" "ENSG00000262810.1" "ENSG00000248702.1"
## [187] "ENSG00000235023.1" "ENSG00000260788.6" "ENSG00000227920.3"
## [190] "ENSG00000226096.1" "ENSG00000227775.3" "ENSG00000235096.2"
## [193] "ENSG00000254364.1" "ENSG00000224545.1" "ENSG00000254458.1"
## [196] "ENSG00000272715.1" "ENSG00000272485.1" "ENSG00000259200.2"
## [199] "ENSG00000203392.3" "ENSG00000261177.1" "ENSG00000280222.1"
## [202] "ENSG00000261472.1" "ENSG00000288079.1" "ENSG00000278071.1"
## [205] "ENSG00000288639.1" "ENSG00000248359.2" "ENSG00000259961.1"
## [208] "ENSG00000263508.6" "ENSG00000288615.1" "ENSG00000285623.1"
## [211] "ENSG00000224731.1" "ENSG00000236800.1" "ENSG00000253051.1"
## [214] "ENSG00000241354.2" "ENSG00000273209.1" "ENSG00000230404.1"
## [217] "ENSG00000228345.2" "ENSG00000230233.1" "ENSG00000286356.1"
## [220] "ENSG00000186244.7" "ENSG00000235886.1" "ENSG00000233981.1"
## [223] "ENSG00000212580.1" "ENSG00000197099.8" "ENSG00000284652.1"
## [226] "ENSG00000248128.1" "ENSG00000285367.1" "ENSG00000270174.1"
## [229] "ENSG00000253608.2" "ENSG00000267658.1" "ENSG00000248521.1"
## [232] "ENSG00000271269.1" "ENSG00000249930.1" "ENSG00000258399.7"
## [235] "ENSG00000272215.1" "ENSG00000268926.3" "ENSG00000232072.1"
## [238] "ENSG00000230647.2" "ENSG00000234017.1" "ENSG00000255545.8"
## [241] "ENSG00000255021.1" "ENSG00000285775.1" "ENSG00000228613.1"
## [244] "ENSG00000229607.1" "ENSG00000267579.1" "ENSG00000260004.1"
## [247] "ENSG00000286276.1" "ENSG00000226169.1" "ENSG00000213076.3"
## [250] "ENSG00000199751.1" "ENSG00000250409.1" "ENSG00000280015.1"
## [253] "ENSG00000259252.1" "ENSG00000266101.1" "ENSG00000254321.2"
## [256] "ENSG00000207210.1" "ENSG00000258760.1" "ENSG00000237920.2"
## [259] "ENSG00000287903.1" "ENSG00000248733.1" "ENSG00000199200.2"
## [262] "ENSG00000256708.1" "ENSG00000270120.1" "ENSG00000226398.1"
## [265] "ENSG00000254538.1" "ENSG00000254607.2" "ENSG00000270591.1"
## [268] "ENSG00000276332.1" "ENSG00000285534.1" "ENSG00000226965.2"
## [271] "ENSG00000277831.1" "ENSG00000236466.2" "ENSG00000238503.1"
## [274] "ENSG00000270927.1" "ENSG00000271795.1" "ENSG00000255605.1"
## [277] "ENSG00000242444.3" "ENSG00000226266.6" "ENSG00000203446.2"
## [280] "ENSG00000280047.1" "ENSG00000277050.1" "ENSG00000286948.1"
## [283] "ENSG00000234860.2" "ENSG00000223727.6"
table(genes$`Higher methylation in hPGCLCs`%in% Genes_df$ensembl_gene_id_version)
##
## FALSE TRUE
## 8 63
<- Genes_df[Genes_df$ensembl_gene_id_version %in% genes$`Higher methylation in hPGCLCs`, "entrez_gene_id"]
DMG_uphPGCLCsvsiMeLCs <- na.omit(DMG_uphPGCLCsvsiMeLCs[!DMG_uphPGCLCsvsiMeLCs %in% ""])
DMG_uphPGCLCsvsiMeLCs <- Genes_df[Genes_df$ensembl_gene_id_version %in% genes$`Higher methylation in iMeLCs`, "entrez_gene_id"]
DMG_downhPGCLCsvsiMeLCs <- na.omit(DMG_downhPGCLCsvsiMeLCs[!DMG_downhPGCLCsvsiMeLCs %in% ""]) DMG_downhPGCLCsvsiMeLCs
any(duplicated(DMG_uphPGCLCsvsiMeLCs))
## [1] FALSE
any(duplicated(DMG_downhPGCLCsvsiMeLCs))
## [1] FALSE
$`Higher methylation in hPGCLCs` <- DMG_uphPGCLCsvsiMeLCs
genes$`Higher methylation in iMeLCs` <- DMG_downhPGCLCsvsiMeLCs genes
<- compareCluster(geneCluster = genes,
compKEGG fun = "enrichKEGG",
organism="hsa",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
universe = GeneUniverse_entrezid)
## Warning in compareCluster(geneCluster = genes, fun = "enrichKEGG", organism =
## "hsa", : No enrichment found in any of gene cluster, please check your input...
There are no significant terms (They have Padj>0.05)
if (!is.null(compKEGG)) {
dotplot(compKEGG, showCategory = 15, title = "KEGG Pathway Enrichment Analysis")
}
<- compareCluster(geneCluster = genes,
compGO fun = "enrichGO",
keyType = "ENTREZID",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
OrgDb='org.Hs.eg.db',
universe=GeneUniverse_entrezid,
ont = "ALL")
dotplot(compGO, showCategory = 15, title = "GO Enrichment Analysis")
A new DMRs_annotated
object is created in which
annotation from ChIPseeker
is added.
mcols(DMRs_annotated$iMeLCsvshPGCLCs)$ChipSeekerAnn <- NA
mcols(DMRs_annotated$iMeLCsvshPGCLCs)$ChipSeekerDistanceToTss <- NA
mcols(DMRs_annotated$iMeLCsvshPGCLCs)$ChipSeekerGeneId <- NA
<- findOverlaps(DMRs_annotated$iMeLCsvshPGCLCs, peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)
m mcols(DMRs_annotated$iMeLCsvshPGCLCs)[queryHits(m), "ChipSeekerAnn"] <- mcols(peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)[subjectHits(m), "annotation"]
mcols(DMRs_annotated$iMeLCsvshPGCLCs)[queryHits(m), "ChipSeekerDistanceToTss"] <- mcols(peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)[subjectHits(m), "distanceToTSS"]
mcols(DMRs_annotated$iMeLCsvshPGCLCs)[queryHits(m), "ChipSeekerGeneId"] <- mcols(peakAnnoList[["Higher methylation in hPGCLCs"]]@anno)[subjectHits(m), "geneId"]
<- findOverlaps(DMRs_annotated$iMeLCsvshPGCLCs, peakAnnoList[["Higher methylation in iMeLCs"]]@anno)
m mcols(DMRs_annotated$iMeLCsvshPGCLCs)[queryHits(m), "ChipSeekerAnn"] <- mcols(peakAnnoList[["Higher methylation in iMeLCs"]]@anno)[subjectHits(m), "annotation"]
mcols(DMRs_annotated$iMeLCsvshPGCLCs)[queryHits(m), "ChipSeekerDistanceToTss"] <- mcols(peakAnnoList[["Higher methylation in iMeLCs"]]@anno)[subjectHits(m), "distanceToTSS"]
mcols(DMRs_annotated$iMeLCsvshPGCLCs)[queryHits(m), "ChipSeekerGeneId"] <- mcols(peakAnnoList[["Higher methylation in iMeLCs"]]@anno)[subjectHits(m), "geneId"]
Adding entrez gene id, gene symbol and gene information
$iMeLCsvshPGCLCs <- as.data.frame(DMRs_annotated$iMeLCsvshPGCLCs)
DMRs_annotated$iMeLCsvshPGCLCs <- dplyr::left_join(DMRs_annotated$iMeLCsvshPGCLCs, Genes_df, by=c('ChipSeekerGeneId' = 'ensembl_gene_id_version')) DMRs_annotated
$iMeLCsvshPGCLCs[!is.na(DMRs_annotated$iMeLCsvshPGCLCs$hgnc_symbol),] %>% select(-c("ChipSeekerDistanceToTss", "ChipSeekerGeneId", "ensembl_gene_id", "entrez_gene_id")) %>% distinct() %>%
DMRs_annotateddatatable(class = 'hover', rownames = FALSE, caption="DMRs - iMeLCsvshPGCLCs", extension='Buttons', escape = FALSE,
options = list(pageLength=20, dom='Bfrtip', autoWidth=TRUE,
buttons=list(c('csv', 'excel'))))
ChIPseeker
DMRs are now annotated to their associated genes and genomic regions
using the annotatePeaks
function of the
ChIPseeker
R package, using a TxDb object. The GenomicFeatures
package uses TxDb
objects to store transcript metadata.
This class maps the 5’ and 3’ untranslated regions (UTRs), protein
coding sequences (CDSs) and exons for a set of mRNA transcripts to their
associated genome. All TxDb
objects are backed by a SQLite
database that manages genomic locations and the relationships between
pre-processed mRNA transcripts, exons, protein coding sequences, and
their related gene identifiers.
TxDb created by us
<- lapply(list("Higher methylation in iMeLCs" = DMRs_GRanges$hEGCLCsvsiMeLCs[DMRs_GRanges$hEGCLCsvsiMeLCs$diff.Methy<0,], "Higher methylation in hEGCLCs" = DMRs_GRanges$hEGCLCsvsiMeLCs[DMRs_GRanges$hEGCLCsvsiMeLCs$diff.Methy>0,]), annotatePeak, TxDb=txdb_v35, tssRegion=c(-3000, 3000), verbose=FALSE) peakAnnoList
::plotAnnoBar(peakAnnoList) ChIPseeker
::plotDistToTSS(peakAnnoList) ChIPseeker
Given a list of gene set, compareCluster
function will
compute profiles of each gene cluster.
= lapply(peakAnnoList, function(i) unique(as.data.frame(i)$geneId))
genes names(genes) = sub("_", "\n", names(genes))
any(duplicated(genes$`Higher methylation in hEGCLCs`))
## [1] FALSE
any(duplicated(genes$`Higher methylation in iMeLCs`))
## [1] FALSE
There are no duplicated ensembl ids
table(genes$`Higher methylation in iMeLCs` %in% Genes_df$ensembl_gene_id_version)
##
## FALSE TRUE
## 162 528
$`Higher methylation in iMeLCs` [!genes$`Higher methylation in iMeLCs` %in% Genes_df$ensembl_gene_id_version]
genes## [1] "ENSG00000274219.1" "ENSG00000261842.1" "ENSG00000240449.1"
## [4] "ENSG00000278000.1" "ENSG00000279031.1" "ENSG00000280353.1"
## [7] "ENSG00000251196.1" "ENSG00000279611.1" "ENSG00000270889.1"
## [10] "ENSG00000241014.2" "ENSG00000274177.1" "ENSG00000286717.1"
## [13] "ENSG00000285833.1" "ENSG00000233820.2" "ENSG00000267713.1"
## [16] "ENSG00000221093.1" "ENSG00000269388.1" "ENSG00000259607.1"
## [19] "ENSG00000287002.1" "ENSG00000287570.1" "ENSG00000261675.1"
## [22] "ENSG00000283075.1" "ENSG00000250900.6" "ENSG00000235538.3"
## [25] "ENSG00000230285.1" "ENSG00000275201.1" "ENSG00000229402.1"
## [28] "ENSG00000259789.2" "ENSG00000240882.1" "ENSG00000237661.1"
## [31] "ENSG00000275833.1" "ENSG00000253942.1" "ENSG00000217702.3"
## [34] "ENSG00000103200.6" "ENSG00000233052.1" "ENSG00000251250.3"
## [37] "ENSG00000230699.2" "ENSG00000274977.1" "ENSG00000225489.7"
## [40] "ENSG00000267202.5" "ENSG00000260509.2" "ENSG00000268230.5"
## [43] "ENSG00000255535.2" "ENSG00000287060.1" "ENSG00000285712.1"
## [46] "ENSG00000259983.1" "ENSG00000267341.1" "ENSG00000243818.5"
## [49] "ENSG00000233332.1" "ENSG00000272915.1" "ENSG00000285329.1"
## [52] "ENSG00000278722.1" "ENSG00000269289.6" "ENSG00000267786.1"
## [55] "ENSG00000280282.1" "ENSG00000230537.2" "ENSG00000267353.1"
## [58] "ENSG00000261460.1" "ENSG00000230936.1" "ENSG00000287742.1"
## [61] "ENSG00000277878.1" "ENSG00000268886.1" "ENSG00000288571.1"
## [64] "ENSG00000225868.7" "ENSG00000229196.4" "ENSG00000238277.1"
## [67] "ENSG00000261651.1" "ENSG00000249149.2" "ENSG00000251211.1"
## [70] "ENSG00000287486.1" "ENSG00000250105.1" "ENSG00000229679.1"
## [73] "ENSG00000261090.2" "ENSG00000261720.1" "ENSG00000234104.1"
## [76] "ENSG00000285829.1" "ENSG00000276687.1" "ENSG00000287744.1"
## [79] "ENSG00000226659.1" "ENSG00000204850.4" "ENSG00000259394.2"
## [82] "ENSG00000213150.2" "ENSG00000266283.1" "ENSG00000267040.6"
## [85] "ENSG00000286697.1" "ENSG00000255240.6" "ENSG00000270625.1"
## [88] "ENSG00000250971.3" "ENSG00000250523.1" "ENSG00000235939.1"
## [91] "ENSG00000226426.1" "ENSG00000230033.1" "ENSG00000256695.2"
## [94] "ENSG00000225106.1" "ENSG00000259807.2" "ENSG00000267648.1"
## [97] "ENSG00000276633.1" "ENSG00000214210.4" "ENSG00000230309.1"
## [100] "ENSG00000286407.1" "ENSG00000288561.1" "ENSG00000272688.1"
## [103] "ENSG00000267558.2" "ENSG00000254877.1" "ENSG00000272203.1"
## [106] "ENSG00000280318.1" "ENSG00000261076.1" "ENSG00000197099.8"
## [109] "ENSG00000286046.1" "ENSG00000229312.2" "ENSG00000285228.1"
## [112] "ENSG00000279578.1" "ENSG00000276476.3" "ENSG00000233247.3"
## [115] "ENSG00000224352.1" "ENSG00000146722.11" "ENSG00000259520.6"
## [118] "ENSG00000205584.6" "ENSG00000174977.8" "ENSG00000285741.1"
## [121] "ENSG00000256029.6" "ENSG00000270891.1" "ENSG00000229299.2"
## [124] "ENSG00000283633.1" "ENSG00000224765.1" "ENSG00000274397.1"
## [127] "ENSG00000202459.1" "ENSG00000262668.2" "ENSG00000274660.1"
## [130] "ENSG00000237947.1" "ENSG00000258385.1" "ENSG00000257512.1"
## [133] "ENSG00000230964.1" "ENSG00000234535.1" "ENSG00000268677.1"
## [136] "ENSG00000261213.1" "ENSG00000259010.1" "ENSG00000270131.1"
## [139] "ENSG00000228532.4" "ENSG00000286995.1" "ENSG00000275649.1"
## [142] "ENSG00000284734.1" "ENSG00000256673.1" "ENSG00000255182.2"
## [145] "ENSG00000224273.2" "ENSG00000255801.1" "ENSG00000188525.4"
## [148] "ENSG00000237525.7" "ENSG00000285553.1" "ENSG00000232328.3"
## [151] "ENSG00000253028.1" "ENSG00000238110.1" "ENSG00000253633.2"
## [154] "ENSG00000257657.3" "ENSG00000258422.5" "ENSG00000255910.2"
## [157] "ENSG00000207176.1" "ENSG00000264149.1" "ENSG00000253585.1"
## [160] "ENSG00000278078.1" "ENSG00000288253.1" "ENSG00000227459.1"
table(genes$`Higher methylation in hEGCLCs`%in% Genes_df$ensembl_gene_id_version)
##
## FALSE TRUE
## 27 61
<- Genes_df[Genes_df$ensembl_gene_id_version %in% genes$`Higher methylation in hEGCLCs`, "entrez_gene_id"]
DMG_uphEGCLCsvsiMeLCs <- na.omit(DMG_uphEGCLCsvsiMeLCs[!DMG_uphEGCLCsvsiMeLCs %in% ""])
DMG_uphEGCLCsvsiMeLCs <- Genes_df[Genes_df$ensembl_gene_id_version %in% genes$`Higher methylation in iMeLCs`, "entrez_gene_id"]
DMG_downhEGCLCsvsiMeLCs <- na.omit(DMG_downhEGCLCsvsiMeLCs[!DMG_downhEGCLCsvsiMeLCs %in% ""]) DMG_downhEGCLCsvsiMeLCs
any(duplicated(DMG_uphEGCLCsvsiMeLCs))
## [1] FALSE
any(duplicated(DMG_downhEGCLCsvsiMeLCs))
## [1] FALSE
$`Higher methylation in hEGCLCs` <- DMG_uphEGCLCsvsiMeLCs
genes$`Higher methylation in iMeLCs` <- DMG_downhEGCLCsvsiMeLCs genes
<- compareCluster(geneCluster = genes,
compKEGG fun = "enrichKEGG",
organism="hsa",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
universe = GeneUniverse_entrezid)
## Warning in compareCluster(geneCluster = genes, fun = "enrichKEGG", organism =
## "hsa", : No enrichment found in any of gene cluster, please check your input...
There are no significant terms (They have Padj>0.05)
if (!is.null(compKEGG)) {
dotplot(compKEGG, showCategory = 15, title = "KEGG Pathway Enrichment Analysis")
}
<- compareCluster(geneCluster = genes,
compGO fun = "enrichGO",
keyType = "ENTREZID",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
OrgDb='org.Hs.eg.db',
universe=GeneUniverse_entrezid,
ont = "ALL")
dotplot(compGO, showCategory = 15, title = "GO Enrichment Analysis")
A new DMRs_annotated
object is created in which
annotation from ChIPseeker
is added.
mcols(DMRs_annotated$hEGCLCsvsiMeLCs)$ChipSeekerAnn <- NA
mcols(DMRs_annotated$hEGCLCsvsiMeLCs)$ChipSeekerDistanceToTss <- NA
mcols(DMRs_annotated$hEGCLCsvsiMeLCs)$ChipSeekerGeneId <- NA
<- findOverlaps(DMRs_annotated$hEGCLCsvsiMeLCs, peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)
m mcols(DMRs_annotated$hEGCLCsvsiMeLCs)[queryHits(m), "ChipSeekerAnn"] <- mcols(peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)[subjectHits(m), "annotation"]
mcols(DMRs_annotated$hEGCLCsvsiMeLCs)[queryHits(m), "ChipSeekerDistanceToTss"] <- mcols(peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)[subjectHits(m), "distanceToTSS"]
mcols(DMRs_annotated$hEGCLCsvsiMeLCs)[queryHits(m), "ChipSeekerGeneId"] <- mcols(peakAnnoList[["Higher methylation in hEGCLCs"]]@anno)[subjectHits(m), "geneId"]
<- findOverlaps(DMRs_annotated$hEGCLCsvsiMeLCs, peakAnnoList[["Higher methylation in iMeLCs"]]@anno)
m mcols(DMRs_annotated$hEGCLCsvsiMeLCs)[queryHits(m), "ChipSeekerAnn"] <- mcols(peakAnnoList[["Higher methylation in iMeLCs"]]@anno)[subjectHits(m), "annotation"]
mcols(DMRs_annotated$hEGCLCsvsiMeLCs)[queryHits(m), "ChipSeekerDistanceToTss"] <- mcols(peakAnnoList[["Higher methylation in iMeLCs"]]@anno)[subjectHits(m), "distanceToTSS"]
mcols(DMRs_annotated$hEGCLCsvsiMeLCs)[queryHits(m), "ChipSeekerGeneId"] <- mcols(peakAnnoList[["Higher methylation in iMeLCs"]]@anno)[subjectHits(m), "geneId"]
Adding entrez gene id, gene symbol and gene information
$hEGCLCsvsiMeLCs <- as.data.frame(DMRs_annotated$hEGCLCsvsiMeLCs)
DMRs_annotated$hEGCLCsvsiMeLCs <- dplyr::left_join(DMRs_annotated$hEGCLCsvsiMeLCs, Genes_df, by=c('ChipSeekerGeneId' = 'ensembl_gene_id_version')) DMRs_annotated
$hEGCLCsvsiMeLCs[!is.na(DMRs_annotated$hEGCLCsvsiMeLCs$hgnc_symbol),] %>% select(-c("ChipSeekerDistanceToTss", "ChipSeekerGeneId", "ensembl_gene_id", "entrez_gene_id")) %>% distinct() %>%
DMRs_annotateddatatable(class = 'hover', rownames = FALSE, caption="DMRs - iMeLCsvshEGCLCs", extension='Buttons', escape = FALSE,
options = list(pageLength=20, dom='Bfrtip', autoWidth=TRUE,
buttons=list(c('csv', 'excel'))))
saveRDS(Genes_df, paste0(OutputFolder, '/', 'GeneUniverse.rds'))
saveRDS(DMRs_annotated, paste0(OutputFolder, '/', 'DMRsAnnotated_final.rds'))
<- sessionInfo()
SessionInfo <- date() Date
Date## [1] "Thu Jan 9 19:24:24 2025"
SessionInfo## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] data.table_1.14.8 DT_0.28
## [3] annotatr_1.24.0 dmrseq_1.18.1
## [5] clusterProfiler_4.6.2 ReactomePA_1.42.0
## [7] org.Hs.eg.db_3.16.0 AnnotationDbi_1.60.2
## [9] ChIPseeker_1.34.1 gridExtra_2.3
## [11] scales_1.2.1 plotly_4.10.2
## [13] ggplot2_3.4.2 DSS_2.46.0
## [15] bsseq_1.34.0 SummarizedExperiment_1.28.0
## [17] MatrixGenerics_1.10.0 matrixStats_1.0.0
## [19] GenomicRanges_1.50.2 GenomeInfoDb_1.34.9
## [21] IRanges_2.32.0 S4Vectors_0.36.2
## [23] BiocParallel_1.32.6 Biobase_2.58.0
## [25] BiocGenerics_0.44.0
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.3
## [2] R.utils_2.12.2
## [3] tidyselect_1.2.0
## [4] RSQLite_2.3.1
## [5] htmlwidgets_1.6.2
## [6] grid_4.2.1
## [7] scatterpie_0.1.8
## [8] munsell_0.5.0
## [9] codetools_0.2-19
## [10] withr_2.5.0
## [11] colorspace_2.1-0
## [12] GOSemSim_2.24.0
## [13] filelock_1.0.2
## [14] highr_0.10
## [15] knitr_1.43
## [16] rstudioapi_0.15.0
## [17] DOSE_3.24.2
## [18] labeling_0.4.2
## [19] GenomeInfoDbData_1.2.9
## [20] polyclip_1.10-4
## [21] bit64_4.0.5
## [22] farver_2.1.1
## [23] rhdf5_2.42.1
## [24] downloader_0.4
## [25] vctrs_0.6.3
## [26] treeio_1.23.1
## [27] generics_0.1.3
## [28] gson_0.1.0
## [29] xfun_0.39
## [30] BiocFileCache_2.6.1
## [31] regioneR_1.30.0
## [32] R6_2.5.1
## [33] graphlayouts_0.8.4
## [34] locfit_1.5-9.7
## [35] bitops_1.0-7
## [36] rhdf5filters_1.10.1
## [37] cachem_1.0.8
## [38] fgsea_1.24.0
## [39] gridGraphics_0.5-1
## [40] DelayedArray_0.24.0
## [41] promises_1.2.0.1
## [42] BiocIO_1.8.0
## [43] ggraph_2.1.0
## [44] enrichplot_1.18.4
## [45] gtable_0.3.3
## [46] tidygraph_1.2.3
## [47] rlang_1.1.1
## [48] splines_4.2.1
## [49] rtracklayer_1.58.0
## [50] lazyeval_0.2.2
## [51] BiocManager_1.30.20
## [52] yaml_2.3.7
## [53] reshape2_1.4.4
## [54] crosstalk_1.2.0
## [55] GenomicFeatures_1.50.4
## [56] httpuv_1.6.11
## [57] qvalue_2.30.0
## [58] tools_4.2.1
## [59] ggplotify_0.1.0
## [60] ellipsis_0.3.2
## [61] gplots_3.1.3
## [62] jquerylib_0.1.4
## [63] RColorBrewer_1.1-3
## [64] Rcpp_1.0.11
## [65] plyr_1.8.8
## [66] sparseMatrixStats_1.10.0
## [67] progress_1.2.2
## [68] zlibbioc_1.44.0
## [69] purrr_1.0.1
## [70] RCurl_1.98-1.12
## [71] prettyunits_1.1.1
## [72] viridis_0.6.2
## [73] bumphunter_1.40.0
## [74] cowplot_1.1.1
## [75] ggrepel_0.9.3
## [76] magrittr_2.0.3
## [77] reactome.db_1.82.0
## [78] xtable_1.8-4
## [79] mime_0.12
## [80] hms_1.1.3
## [81] patchwork_1.1.2
## [82] evaluate_0.21
## [83] HDO.db_0.99.1
## [84] XML_3.99-0.14
## [85] compiler_4.2.1
## [86] biomaRt_2.54.1
## [87] tibble_3.2.1
## [88] KernSmooth_2.23-22
## [89] crayon_1.5.2
## [90] shadowtext_0.1.2
## [91] R.oo_1.25.0
## [92] htmltools_0.5.5
## [93] tzdb_0.4.0
## [94] later_1.3.1
## [95] ggfun_0.0.9
## [96] tidyr_1.3.0
## [97] aplot_0.1.10
## [98] DBI_1.1.3
## [99] tweenr_2.0.2
## [100] dbplyr_2.3.3
## [101] MASS_7.3-60
## [102] rappdirs_0.3.3
## [103] boot_1.3-28.1
## [104] readr_2.1.4
## [105] Matrix_1.6-0
## [106] permute_0.9-7
## [107] cli_3.6.1
## [108] R.methodsS3_1.8.2
## [109] igraph_1.5.0
## [110] pkgconfig_2.0.3
## [111] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [112] GenomicAlignments_1.34.1
## [113] foreach_1.5.2
## [114] xml2_1.3.5
## [115] ggtree_3.6.2
## [116] bslib_0.5.0
## [117] rngtools_1.5.2
## [118] XVector_0.38.0
## [119] doRNG_1.8.6
## [120] yulab.utils_0.0.6
## [121] stringr_1.5.0
## [122] digest_0.6.33
## [123] graph_1.76.0
## [124] Biostrings_2.66.0
## [125] rmarkdown_2.23
## [126] fastmatch_1.1-3
## [127] tidytree_0.4.2
## [128] DelayedMatrixStats_1.20.0
## [129] restfulr_0.0.15
## [130] curl_5.0.1
## [131] shiny_1.7.4.1
## [132] Rsamtools_2.14.0
## [133] gtools_3.9.4
## [134] graphite_1.44.0
## [135] rjson_0.2.21
## [136] outliers_0.15
## [137] lifecycle_1.0.3
## [138] nlme_3.1-162
## [139] jsonlite_1.8.7
## [140] Rhdf5lib_1.20.0
## [141] viridisLite_0.4.2
## [142] limma_3.54.2
## [143] BSgenome_1.66.3
## [144] fansi_1.0.4
## [145] pillar_1.9.0
## [146] lattice_0.21-8
## [147] KEGGREST_1.38.0
## [148] fastmap_1.1.1
## [149] httr_1.4.6
## [150] plotrix_3.8-2
## [151] GO.db_3.16.0
## [152] interactiveDisplayBase_1.36.0
## [153] glue_1.6.2
## [154] iterators_1.0.14
## [155] png_0.1-8
## [156] BiocVersion_3.16.0
## [157] bit_4.0.5
## [158] ggforce_0.4.1
## [159] stringi_1.7.12
## [160] sass_0.4.7
## [161] HDF5Array_1.26.0
## [162] blob_1.2.4
## [163] AnnotationHub_3.6.0
## [164] caTools_1.18.2
## [165] memoise_2.0.1
## [166] dplyr_1.1.2
## [167] ape_5.7-1