for (i in 1:length(params))
print(paste('Parameter:', names(params)[i], ' - Value:', params[[i]], '- Class:', class(params[[i]])))
## [1] "Parameter: Dataset - Value: FetalVsOrganoidsChronic - Class: character"
## [1] "Parameter: topTagFile - Value: Data/AllSEcorrected.RData - Class: character"
## [1] "Parameter: FDRTr - Value: 0.05 - Class: numeric"
## [1] "Parameter: LogFCTr - Value: 0.5 - Class: numeric"
## [1] "Parameter: OutputFolder - Value: Data/FunctionalAnalysis/FetalVsOrganoidsMixN/ - Class: character"
## [1] "Parameter: GOSeq - Value: Yes - Class: character"
## [1] "Parameter: TopGO - Value: Yes - Class: character"
## [1] "Parameter: Camera - Value: Yes - Class: character"
## [1] "Parameter: Gsea - Value: Yes - Class: character"
## [1] "Parameter: GseaPvalSel - Value: 0.05 - Class: numeric"
<- params$Dataset
Dataset <- params$LogFCTr
LogFCTh <- params$FDRTr
FDRTh <- ifelse(is.null(params$OutputFolder), getwd(), params$OutputFolder)
OutputFolder
if (dir.exists(OutputFolder) == FALSE) {
dir.create(OutputFolder, recursive=FALSE)
}
if (dir.exists(paste0(OutputFolder, 'TopGO')) == FALSE) {
dir.create(paste0(OutputFolder, 'TopGO'), recursive=FALSE)
}
if (dir.exists(paste0(OutputFolder, 'Gsea')) == FALSE) {
dir.create(paste0(OutputFolder, 'Gsea'), recursive=FALSE)
}
library(stringr)
library(DT)
library(ggplot2)
library(ggrepel)
library(AnnotationDbi)
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
library(org.Hs.eg.db)
##
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:Biobase':
##
## combine
## The following object is masked from 'package:BiocGenerics':
##
## combine
library(RColorBrewer)
library(topGO)
## Loading required package: graph
##
## Attaching package: 'graph'
## The following object is masked from 'package:stringr':
##
## boundary
## Loading required package: GO.db
##
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
##
## groupGOTerms: GOBPTerm, GOMFTerm, GOCCTerm environments built.
##
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
##
## members
library(fgsea)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:S4Vectors':
##
## expand
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:graph':
##
## union
## The following object is masked from 'package:gridExtra':
##
## combine
## The following object is masked from 'package:AnnotationDbi':
##
## select
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following object is masked from 'package:Biobase':
##
## combine
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
options(stringsAsFactors=FALSE)
source('Functions/EDC_Functions.R')
source('Functions/CriFormatted.R')
source('Functions/goseq.R')
upload the result if differential expression analysis after the clustering of MixN DEGs between Organoids and Fetal.
load(params$topTagFile, verbose=TRUE)
## Loading objects:
## SEs
## DEAs
load("Data/DEGsCluster.RData", verbose = T)
## Loading objects:
## dc
Gene ontology enrichment analysis is performed on the sets of genes found from the cluster analysis between fetal and organoids, using TopGO with Fisher statistics and weight01 algorithm.
I generate vectors for the gene universe, all modulated genes, up-regulated genes and down-regulated genes in the format required by TopGO.
<- unique(union(rownames(DEAs$chronic.org),rownames(DEAs$chronic.fetal)))
UniverseGenes <- unique(union(dc$up.both,dc$down.both))
DEG <- unique(dc$up.both)
DEGup <- unique(dc$down.both)
DEGdown <- unique(union(union(dc$only_org,dc$only_fetal),dc$discordant))
DEGDiscordant
# generation of named vectors in the format required by TopGO
<- list()
GeneVectors $DEGenes <- factor(as.integer(UniverseGenes%in%DEG))
GeneVectorsnames(GeneVectors$DEGenes) <- UniverseGenes
$DEGenesDown <- factor(as.integer(UniverseGenes%in%DEGdown))
GeneVectorsnames(GeneVectors$DEGenesDown) <- UniverseGenes
$DEGenesUp <- factor(as.integer(UniverseGenes%in%DEGup))
GeneVectorsnames(GeneVectors$DEGenesUp) <- UniverseGenes
$DEGenesDiscordant <- factor(as.integer(UniverseGenes%in%DEGDiscordant))
GeneVectorsnames(GeneVectors$DEGenesDiscordant) <- UniverseGenes
Therefore:
<- ifelse(params$TopGO=='Yes', TRUE, FALSE)
BpEval # the analysis is not done if the number of DEGs is lower than 2.
<- ifelse(table(GeneVectors$DEGenes)['1'] < 2 | is.na(table(GeneVectors$DEGenes)['1']) | table(GeneVectors$DEGenesDown)['1'] < 2 | is.na(table(GeneVectors$DEGenesDown)['1']) | table(GeneVectors$DEGenesUp)['1'] < 2 | is.na(table(GeneVectors$DEGenesUp)['1'])| table(GeneVectors$DEGenesDiscordant)['1'] < 2 | is.na(table(GeneVectors$DEGenesDiscordant)['1']), FALSE, BpEval) BpEval
On the basis of the analysis settings or the number of differentially expressed genes, TopGO analysis IS performed.
# I generate a list that contains the association between each gene and the GO terms that are associated to it
<- topGO::annFUN.org(whichOnto="BP", feasibleGenes=names(GeneVectors$DEGenes), mapping="org.Hs.eg.db", ID="symbol") %>% inverseList()
BPann
# Wrapper function for topGO analysis (see helper file)
<- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=BPann, ontology='BP', description=NULL, nodeSize=10, algorithm='weight01', statistic='fisher', EnTh=2, PvalTh=0.01, minTerms=10)
ResBPAll ##
## Building most specific GOs .....
## ( 11502 GO terms found. )
##
## Build GO DAG topology ..........
## ( 15334 GO terms and 35660 relations. )
##
## Annotating nodes ...............
## ( 13136 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 4794 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 19: 1 nodes to be scored (0 eliminated genes)
##
## Level 18: 1 nodes to be scored (0 eliminated genes)
##
## Level 17: 6 nodes to be scored (19 eliminated genes)
##
## Level 16: 11 nodes to be scored (24 eliminated genes)
##
## Level 15: 27 nodes to be scored (93 eliminated genes)
##
## Level 14: 66 nodes to be scored (195 eliminated genes)
##
## Level 13: 117 nodes to be scored (582 eliminated genes)
##
## Level 12: 198 nodes to be scored (1755 eliminated genes)
##
## Level 11: 350 nodes to be scored (3869 eliminated genes)
##
## Level 10: 541 nodes to be scored (5708 eliminated genes)
##
## Level 9: 687 nodes to be scored (7347 eliminated genes)
##
## Level 8: 723 nodes to be scored (9345 eliminated genes)
##
## Level 7: 731 nodes to be scored (10788 eliminated genes)
##
## Level 6: 612 nodes to be scored (11732 eliminated genes)
##
## Level 5: 394 nodes to be scored (12273 eliminated genes)
##
## Level 4: 212 nodes to be scored (12701 eliminated genes)
##
## Level 3: 96 nodes to be scored (12855 eliminated genes)
##
## Level 2: 20 nodes to be scored (12925 eliminated genes)
##
## Level 1: 1 nodes to be scored (13004 eliminated genes)
write.table(ResBPAll$ResAll, file=paste0(OutputFolder, 'TopGO/BPAllResults.txt'), sep='\t', row.names=FALSE)
# I generate a list that contains the association between each gene and the GO terms that are associated to it
<- topGO::annFUN.org(whichOnto="BP", feasibleGenes=names(GeneVectors$DEGenesDown), mapping="org.Hs.eg.db", ID="symbol") %>% inverseList()
BPannDown
# Wrapper function for topGO analysis (see helper file)
<- topGOResults(Genes=GeneVectors$DEGenesDown, gene2GO=BPannDown, ontology='BP', description=NULL, nodeSize=10, algorithm='weight01', statistic='fisher', EnTh=2, PvalTh=0.01, minTerms=10)
ResBPAllDown ##
## Building most specific GOs .....
## ( 11502 GO terms found. )
##
## Build GO DAG topology ..........
## ( 15334 GO terms and 35660 relations. )
##
## Annotating nodes ...............
## ( 13136 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 3504 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 19: 1 nodes to be scored (0 eliminated genes)
##
## Level 18: 1 nodes to be scored (0 eliminated genes)
##
## Level 17: 5 nodes to be scored (19 eliminated genes)
##
## Level 16: 6 nodes to be scored (24 eliminated genes)
##
## Level 15: 17 nodes to be scored (77 eliminated genes)
##
## Level 14: 39 nodes to be scored (144 eliminated genes)
##
## Level 13: 70 nodes to be scored (426 eliminated genes)
##
## Level 12: 125 nodes to be scored (1386 eliminated genes)
##
## Level 11: 229 nodes to be scored (3459 eliminated genes)
##
## Level 10: 353 nodes to be scored (5181 eliminated genes)
##
## Level 9: 478 nodes to be scored (6816 eliminated genes)
##
## Level 8: 505 nodes to be scored (8827 eliminated genes)
##
## Level 7: 570 nodes to be scored (10360 eliminated genes)
##
## Level 6: 494 nodes to be scored (11535 eliminated genes)
##
## Level 5: 324 nodes to be scored (12192 eliminated genes)
##
## Level 4: 184 nodes to be scored (12665 eliminated genes)
##
## Level 3: 82 nodes to be scored (12841 eliminated genes)
##
## Level 2: 20 nodes to be scored (12923 eliminated genes)
##
## Level 1: 1 nodes to be scored (13003 eliminated genes)
write.table(ResBPAllDown$ResAll, file=paste0(OutputFolder, 'TopGO/BPAllResultsDown.txt'), sep='\t', row.names=FALSE)
# I generate a list that contains the association between each gene and the GO terms that are associated to it
<- topGO::annFUN.org(whichOnto="BP", feasibleGenes=names(GeneVectors$DEGenesUp), mapping="org.Hs.eg.db", ID="symbol") %>% inverseList()
BPannUp
# Wrapper function for topGO analysis (see helper file)
<- topGOResults(Genes=GeneVectors$DEGenesUp, gene2GO=BPannUp, ontology='BP', description=NULL, nodeSize=10, algorithm='weight01', statistic='fisher', EnTh=2, PvalTh=0.01, minTerms=10)
ResBPAllUp ##
## Building most specific GOs .....
## ( 11502 GO terms found. )
##
## Build GO DAG topology ..........
## ( 15334 GO terms and 35660 relations. )
##
## Annotating nodes ...............
## ( 13136 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 4066 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 17: 2 nodes to be scored (0 eliminated genes)
##
## Level 16: 6 nodes to be scored (0 eliminated genes)
##
## Level 15: 18 nodes to be scored (44 eliminated genes)
##
## Level 14: 51 nodes to be scored (113 eliminated genes)
##
## Level 13: 102 nodes to be scored (503 eliminated genes)
##
## Level 12: 165 nodes to be scored (1641 eliminated genes)
##
## Level 11: 280 nodes to be scored (3740 eliminated genes)
##
## Level 10: 446 nodes to be scored (5515 eliminated genes)
##
## Level 9: 562 nodes to be scored (7029 eliminated genes)
##
## Level 8: 610 nodes to be scored (8992 eliminated genes)
##
## Level 7: 621 nodes to be scored (10591 eliminated genes)
##
## Level 6: 534 nodes to be scored (11668 eliminated genes)
##
## Level 5: 365 nodes to be scored (12207 eliminated genes)
##
## Level 4: 194 nodes to be scored (12695 eliminated genes)
##
## Level 3: 89 nodes to be scored (12848 eliminated genes)
##
## Level 2: 20 nodes to be scored (12925 eliminated genes)
##
## Level 1: 1 nodes to be scored (13004 eliminated genes)
write.table(ResBPAllUp$ResAll, file=paste0(OutputFolder, 'TopGO/BPAllResultsUp.txt'), sep='\t', row.names=FALSE)
# I generate a list that contains the association between each gene and the GO terms that are associated to it
<- topGO::annFUN.org(whichOnto="BP", feasibleGenes=names(GeneVectors$DEGenesDiscordant), mapping="org.Hs.eg.db", ID="symbol") %>% inverseList()
BPannDisc
# Wrapper function for topGO analysis (see helper file)
<- topGOResults(Genes=GeneVectors$DEGenesDiscordant, gene2GO=BPannDisc, ontology='BP', description=NULL, nodeSize=10, algorithm='weight01', statistic='fisher', EnTh=2, PvalTh=0.01, minTerms=10)
ResBPDisc ##
## Building most specific GOs .....
## ( 11502 GO terms found. )
##
## Build GO DAG topology ..........
## ( 15334 GO terms and 35660 relations. )
##
## Annotating nodes ...............
## ( 13136 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 4993 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 19: 1 nodes to be scored (0 eliminated genes)
##
## Level 18: 1 nodes to be scored (0 eliminated genes)
##
## Level 17: 6 nodes to be scored (19 eliminated genes)
##
## Level 16: 12 nodes to be scored (24 eliminated genes)
##
## Level 15: 27 nodes to be scored (93 eliminated genes)
##
## Level 14: 67 nodes to be scored (245 eliminated genes)
##
## Level 13: 127 nodes to be scored (564 eliminated genes)
##
## Level 12: 223 nodes to be scored (1730 eliminated genes)
##
## Level 11: 365 nodes to be scored (3897 eliminated genes)
##
## Level 10: 584 nodes to be scored (5868 eliminated genes)
##
## Level 9: 714 nodes to be scored (7469 eliminated genes)
##
## Level 8: 769 nodes to be scored (9379 eliminated genes)
##
## Level 7: 736 nodes to be scored (10801 eliminated genes)
##
## Level 6: 619 nodes to be scored (11749 eliminated genes)
##
## Level 5: 409 nodes to be scored (12297 eliminated genes)
##
## Level 4: 214 nodes to be scored (12711 eliminated genes)
##
## Level 3: 97 nodes to be scored (12851 eliminated genes)
##
## Level 2: 21 nodes to be scored (12925 eliminated genes)
##
## Level 1: 1 nodes to be scored (13004 eliminated genes)
write.table(ResBPDisc$ResAll, file=paste0(OutputFolder, 'TopGO/BPAllResultsDisc.txt'), sep='\t', row.names=FALSE)
# the if clauses avoids an error in case there is one empty category
<- topGOBarplot(TopGORes=ResBPAll$ResSel, terms=8, pvalTh=0.01, title=NULL, fill=NULL)
TopGOBar
TopGOBar
ggsave(filename=paste0(OutputFolder, '/TopGO/BarplotSingle.pdf'), TopGOBar,
width=13, height=8)
# the if clauses avoids an error in case there is one empty category
<- topGOBarplot(TopGORes=ResBPAll$ResSel, terms=8, pvalTh=0.01, title=NULL)
TopGOBar
TopGOBar
ggsave(filename=paste0(OutputFolder, '/TopGO/BarplotSingle.pdf'), TopGOBar,
width=13, height=8)
# the if clauses avoids an error in case there is one empty category
if(dim(ResBPAllUp$ResSel)[1] > 0 & dim(ResBPAllDown$ResSel)[1] > 0 & dim(ResBPAll$ResSel)[1] > 0 & dim(ResBPDisc$ResSel)[1] > 0){
<- topGOBarplotAll(TopGOResAll=ResBPAll$ResSel, TopGOResDown=ResBPAllDown$ResSel, TopGOResUp=ResBPAllUp$ResSel, terms=8, pvalTh=0.01, title=NULL)
TopGOBar
TopGOBar
ggsave(filename=paste0(OutputFolder, '/TopGO/Barplot.pdf'), TopGOBar,
width=13, height=8)
}
# the if clauses avoids an error in case there is one empty category
<- topGOBarplot(TopGORes=ResBPDisc$ResSel, terms=8, pvalTh=0.01, title=NULL, fill = NULL)
TopGOBar
TopGOBar
ggsave(filename=paste0(OutputFolder, '/TopGO/BarplotDisc.pdf'), TopGOBar,
width=13, height=8)
# the if clauses avoids an error in case there is one empty category
<- topGOBarplot(TopGORes=ResBPDisc$ResSel, terms=8, pvalTh=0.01, title=NULL)
TopGOBar
TopGOBar
ggsave(filename=paste0(OutputFolder, '/TopGO/BarplotDisc.pdf'), TopGOBar,
width=13, height=8)
<- ifelse(params$GOSeq=='Yes', TRUE, FALSE) GOSeqEval
Gene ontology enrichment analysis is performed on the sets of genes found from the cluster analysis between fetal and organoids.
#descreasing the threshold to get an idea of the category
<- goseq.enrichment(names(GeneVectors$DEGenes), names(GeneVectors$DEGenes)[GeneVectors$DEGenes==1], "GO:BP", cutoff=0.1)
GOSeq ## Loading hg19 length data...
#colnames(GOSeq) <- c("GO.ID", "Term", "Annotated", "Significant", "enrichment", "PValue", "FDR", "genes")
if(dim(GOSeq)[1] > 0) {
<- goTreemap(GOSeq, removeParents=FALSE)
gotr # to avoid errors if no results are found
} ## Loading required package: treemap
## Preparing ancestors
## Warning in if (class(try(col2rgb(bg.labels), silent = TRUE)) == "try-error")
## stop("Invalid bg.labels"): the condition has length > 1 and only the first
## element will be used
#descreasing the threshold to get an idea of the category
<- goseq.enrichment(names(GeneVectors$DEGenesDiscordant), names(GeneVectors$DEGenesDiscordant)[GeneVectors$DEGenesDiscordant==1], "GO:BP", cutoff=0.1)
GOSeq ## Loading hg19 length data...
#colnames(GOSeq) <- c("GO.ID", "Term", "Annotated", "Significant", "enrichment", "PValue", "FDR", "genes")
if(dim(GOSeq)[1] > 0) {
<- goTreemap(GOSeq, removeParents=FALSE)
gotr # to avoid errors if no results are found
} ## Preparing ancestors
## Warning in if (class(try(col2rgb(bg.labels), silent = TRUE)) == "try-error")
## stop("Invalid bg.labels"): the condition has length > 1 and only the first
## element will be used
<- union(rownames(DEAs$chronic.org),rownames(DEAs$chronic.fetal))
UniverseGenes <- dc$only_org
DEG
# generation of named vectors in the format required by TopGO
<- list()
GeneVectors $DEGenes <- factor(as.integer(UniverseGenes%in%DEG))
GeneVectorsnames(GeneVectors$DEGenes) <- UniverseGenes
Therefore:
<- ifelse(params$TopGO=='Yes', TRUE, FALSE)
BpEval # the analysis is not done if the number of DEGs is lower than 2.
<- ifelse(table(GeneVectors$DEGenes)['1'] < 2 | is.na(table(GeneVectors$DEGenes)['1']), FALSE, BpEval) BpEval
On the basis of the analysis settings or the number of differentially expressed genes, TopGO analysis IS performed.
# I generate a list that contains the association between each gene and the GO terms that are associated to it
<- topGO::annFUN.org(whichOnto="BP", feasibleGenes=names(GeneVectors$DEGenes), mapping="org.Hs.eg.db", ID="symbol") %>% inverseList()
BPann
# Wrapper function for topGO analysis (see helper file)
<- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=BPann, ontology='BP', description=NULL, nodeSize=10, algorithm='weight01', statistic='fisher', EnTh=2, PvalTh=0.01, minTerms=10)
ResBPAll ##
## Building most specific GOs .....
## ( 11502 GO terms found. )
##
## Build GO DAG topology ..........
## ( 15334 GO terms and 35660 relations. )
##
## Annotating nodes ...............
## ( 13136 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 3819 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 19: 1 nodes to be scored (0 eliminated genes)
##
## Level 18: 1 nodes to be scored (0 eliminated genes)
##
## Level 17: 5 nodes to be scored (19 eliminated genes)
##
## Level 16: 12 nodes to be scored (24 eliminated genes)
##
## Level 15: 25 nodes to be scored (93 eliminated genes)
##
## Level 14: 42 nodes to be scored (245 eliminated genes)
##
## Level 13: 77 nodes to be scored (555 eliminated genes)
##
## Level 12: 145 nodes to be scored (1338 eliminated genes)
##
## Level 11: 239 nodes to be scored (3448 eliminated genes)
##
## Level 10: 408 nodes to be scored (5320 eliminated genes)
##
## Level 9: 525 nodes to be scored (7049 eliminated genes)
##
## Level 8: 579 nodes to be scored (8947 eliminated genes)
##
## Level 7: 597 nodes to be scored (10509 eliminated genes)
##
## Level 6: 514 nodes to be scored (11629 eliminated genes)
##
## Level 5: 353 nodes to be scored (12241 eliminated genes)
##
## Level 4: 189 nodes to be scored (12682 eliminated genes)
##
## Level 3: 86 nodes to be scored (12841 eliminated genes)
##
## Level 2: 20 nodes to be scored (12923 eliminated genes)
##
## Level 1: 1 nodes to be scored (13004 eliminated genes)
write.table(ResBPAll$ResAll, file=paste0(OutputFolder, 'TopGO/BPAllResultsOrganoids.txt'), sep='\t', row.names=FALSE)
# the if clauses avoids an error in case there is one empty category
<- topGOBarplot(TopGORes =ResBPAll$ResSel, terms=8, pvalTh=0.01, title=NULL, fill = NULL)
TopGOBar
TopGOBar
ggsave(filename=paste0(OutputFolder, '/TopGO/BarplotDisc.pdf'), TopGOBar,
width=13, height=8)
<- topGOBarplot(TopGORes =ResBPAll$ResSel, terms=8, pvalTh=0.01, title=NULL)
TopGOBar
TopGOBar
ggsave(filename=paste0(OutputFolder, '/TopGO/BarplotOrganoids.pdf'), TopGOBar,
width=13, height=8)
#descreasing the threshold to get an idea of the category
<- goseq.enrichment(names(GeneVectors$DEGenes), dc$org.only, "GO:BP", cutoff=0.3)
GOSeq #colnames(GOSeq) <- c("GO.ID", "Term", "Annotated", "Significant", "enrichment", "PValue", "FDR", "genes")
if(dim(GOSeq)[1] > 0) {
<- goTreemap(GOSeq, removeParents=FALSE)
gotr # to avoid errors if no results are found }
<- union(rownames(DEAs$chronic.org),rownames(DEAs$chronic.fetal))
UniverseGenes <- dc$only_fetal
DEG
# generation of named vectors in the format required by TopGO
<- list()
GeneVectors $DEGenes <- factor(as.integer(UniverseGenes%in%DEG))
GeneVectorsnames(GeneVectors$DEGenes) <- UniverseGenes
Therefore:
<- ifelse(params$TopGO=='Yes', TRUE, FALSE)
BpEval # the analysis is not done if the number of DEGs is lower than 2.
<- ifelse(table(GeneVectors$DEGenes)['1'] < 2 | is.na(table(GeneVectors$DEGenes)['1']), FALSE, BpEval) BpEval
On the basis of the analysis settings or the number of differentially expressed genes, TopGO analysis IS performed.
# I generate a list that contains the association between each gene and the GO terms that are associated to it
<- topGO::annFUN.org(whichOnto="BP", feasibleGenes=names(GeneVectors$DEGenes), mapping="org.Hs.eg.db", ID="symbol") %>% inverseList()
BPann
# Wrapper function for topGO analysis (see helper file)
<- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=BPann, ontology='BP', description=NULL, nodeSize=10, algorithm='weight01', statistic='fisher', EnTh=2, PvalTh=0.01, minTerms=10)
ResBPAll ##
## Building most specific GOs .....
## ( 11502 GO terms found. )
##
## Build GO DAG topology ..........
## ( 15334 GO terms and 35660 relations. )
##
## Annotating nodes ...............
## ( 13136 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 2423 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 19: 1 nodes to be scored (0 eliminated genes)
##
## Level 18: 1 nodes to be scored (0 eliminated genes)
##
## Level 17: 4 nodes to be scored (19 eliminated genes)
##
## Level 16: 3 nodes to be scored (24 eliminated genes)
##
## Level 15: 5 nodes to be scored (51 eliminated genes)
##
## Level 14: 22 nodes to be scored (93 eliminated genes)
##
## Level 13: 51 nodes to be scored (168 eliminated genes)
##
## Level 12: 94 nodes to be scored (1064 eliminated genes)
##
## Level 11: 161 nodes to be scored (3056 eliminated genes)
##
## Level 10: 244 nodes to be scored (5108 eliminated genes)
##
## Level 9: 304 nodes to be scored (6681 eliminated genes)
##
## Level 8: 328 nodes to be scored (8498 eliminated genes)
##
## Level 7: 357 nodes to be scored (9742 eliminated genes)
##
## Level 6: 345 nodes to be scored (11241 eliminated genes)
##
## Level 5: 263 nodes to be scored (12013 eliminated genes)
##
## Level 4: 147 nodes to be scored (12603 eliminated genes)
##
## Level 3: 73 nodes to be scored (12823 eliminated genes)
##
## Level 2: 19 nodes to be scored (12923 eliminated genes)
##
## Level 1: 1 nodes to be scored (13002 eliminated genes)
## Warning in mask$eval_all_filter(dots, env_filter): NAs introduced by coercion
write.table(ResBPAll$ResAll, file=paste0(OutputFolder, 'TopGO/BPAllResultsHFPNSC.txt'), sep='\t', row.names=FALSE)
# the if clauses avoids an error in case there is one empty category
<- topGOBarplot(TopGORes =ResBPAll$ResSel, terms=8, pvalTh=0.01, title=NULL, fill = NULL)
TopGOBar
TopGOBar
ggsave(filename=paste0(OutputFolder, '/TopGO/BarplotDisc.pdf'), TopGOBar,
width=13, height=8)
<- topGOBarplot(TopGORes=ResBPAll$ResSel, terms=8, pvalTh=0.01, title=NULL)
TopGOBar
TopGOBar
ggsave(filename=paste0(OutputFolder, '/TopGO/BarplotHFPNSC.pdf'), TopGOBar,
width=13, height=8)
#descreasing the threshold to get an idea of the category
<- goseq.enrichment(names(GeneVectors$DEGenes), dc$fetal.only, "GO:BP", cutoff=0.3)
GOSeq #colnames(GOSeq) <- c("GO.ID", "Term", "Annotated", "Significant", "enrichment", "PValue", "FDR", "genes")
if(dim(GOSeq)[1] > 0) {
<- goTreemap(GOSeq, removeParents=FALSE)
gotr # to avoid errors if no results are found }