Environment Set Up

Parameters

for (i in 1:length(params))
  print(paste('Parameter:', names(params)[i], ' - Value:', params[[i]], '- Class:', class(params[[i]])))
## [1] "Parameter: Dataset  - Value: FetalVsOrganoidsChronic - Class: character"
## [1] "Parameter: topTagFile  - Value: Data/AllSEcorrected.RData - Class: character"
## [1] "Parameter: FDRTr  - Value: 0.05 - Class: numeric"
## [1] "Parameter: LogFCTr  - Value: 0.5 - Class: numeric"
## [1] "Parameter: OutputFolder  - Value: Data/FunctionalAnalysis/FetalVsOrganoidsMixN/ - Class: character"
## [1] "Parameter: GOSeq  - Value: Yes - Class: character"
## [1] "Parameter: TopGO  - Value: Yes - Class: character"
## [1] "Parameter: Camera  - Value: Yes - Class: character"
## [1] "Parameter: Gsea  - Value: Yes - Class: character"
## [1] "Parameter: GseaPvalSel  - Value: 0.05 - Class: numeric"
Dataset <- params$Dataset
LogFCTh <- params$LogFCTr
FDRTh <- params$FDRTr
OutputFolder <- ifelse(is.null(params$OutputFolder), getwd(), params$OutputFolder) 

if (dir.exists(OutputFolder) == FALSE) {
  dir.create(OutputFolder, recursive=FALSE)
}

if (dir.exists(paste0(OutputFolder, 'TopGO')) == FALSE) {
  dir.create(paste0(OutputFolder, 'TopGO'), recursive=FALSE)
}

if (dir.exists(paste0(OutputFolder, 'Gsea')) == FALSE) {
  dir.create(paste0(OutputFolder, 'Gsea'), recursive=FALSE)
}

Libraries and functions

library(stringr)
library(DT)
library(ggplot2)
library(ggrepel)
library(AnnotationDbi)
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
library(org.Hs.eg.db)
## 
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following object is masked from 'package:BiocGenerics':
## 
##     combine
library(RColorBrewer)
library(topGO)
## Loading required package: graph
## 
## Attaching package: 'graph'
## The following object is masked from 'package:stringr':
## 
##     boundary
## Loading required package: GO.db
## 
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
## 
## groupGOTerms:    GOBPTerm, GOMFTerm, GOCCTerm environments built.
## 
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
## 
##     members
library(fgsea)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:S4Vectors':
## 
##     expand
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:graph':
## 
##     union
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
options(stringsAsFactors=FALSE)
source('Functions/EDC_Functions.R')
source('Functions/CriFormatted.R')
source('Functions/goseq.R')

Data Upload: differential expression results

upload the result if differential expression analysis after the clustering of MixN DEGs between Organoids and Fetal.

load(params$topTagFile, verbose=TRUE)
## Loading objects:
##   SEs
##   DEAs
load("Data/DEGsCluster.RData", verbose = T)
## Loading objects:
##   dc

TOPGO for genes consistent and discordant between HFPNSC and CO

Gene ontology enrichment analysis is performed on the sets of genes found from the cluster analysis between fetal and organoids, using TopGO with Fisher statistics and weight01 algorithm.

Selection of modulated genes and generation of gene vectors

I generate vectors for the gene universe, all modulated genes, up-regulated genes and down-regulated genes in the format required by TopGO.

UniverseGenes <- unique(union(rownames(DEAs$chronic.org),rownames(DEAs$chronic.fetal)))
DEG <- unique(union(dc$up.both,dc$down.both))
DEGup <- unique(dc$up.both)
DEGdown <- unique(dc$down.both)
DEGDiscordant <- unique(union(union(dc$only_org,dc$only_fetal),dc$discordant))
  
# generation of named vectors in the format required by TopGO
GeneVectors <- list()
GeneVectors$DEGenes <- factor(as.integer(UniverseGenes%in%DEG))
names(GeneVectors$DEGenes) <- UniverseGenes
GeneVectors$DEGenesDown <- factor(as.integer(UniverseGenes%in%DEGdown))
names(GeneVectors$DEGenesDown) <- UniverseGenes
GeneVectors$DEGenesUp <- factor(as.integer(UniverseGenes%in%DEGup))
names(GeneVectors$DEGenesUp) <- UniverseGenes
GeneVectors$DEGenesDiscordant <- factor(as.integer(UniverseGenes%in%DEGDiscordant))
names(GeneVectors$DEGenesDiscordant) <- UniverseGenes

Therefore:

  • universe genes: 16606 genes
  • modulated genes consistent in the 2 systems: 617 genes
  • modulated genes down in the 2 systems: 278 genes of interest
  • modulated genes up in the 2 systems: 339 genes of interest
  • modulated genes discordant in the 2 systems: 777 genes of interest
BpEval <- ifelse(params$TopGO=='Yes', TRUE, FALSE)
# the analysis is not done if the number of DEGs is lower than 2.
BpEval <- ifelse(table(GeneVectors$DEGenes)['1'] < 2 | is.na(table(GeneVectors$DEGenes)['1']) | table(GeneVectors$DEGenesDown)['1'] < 2 | is.na(table(GeneVectors$DEGenesDown)['1']) | table(GeneVectors$DEGenesUp)['1'] < 2 | is.na(table(GeneVectors$DEGenesUp)['1'])| table(GeneVectors$DEGenesDiscordant)['1'] < 2 | is.na(table(GeneVectors$DEGenesDiscordant)['1']), FALSE, BpEval)

On the basis of the analysis settings or the number of differentially expressed genes, TopGO analysis IS performed.

Biological Process for genes consistently modulated in the 2 systems: 617 genes

# I generate a list that contains the association between each gene and the GO terms that are associated to it
BPann <- topGO::annFUN.org(whichOnto="BP", feasibleGenes=names(GeneVectors$DEGenes), mapping="org.Hs.eg.db", ID="symbol") %>% inverseList()

# Wrapper function for topGO analysis (see helper file)
ResBPAll <- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=BPann, ontology='BP', description=NULL, nodeSize=10, algorithm='weight01', statistic='fisher', EnTh=2, PvalTh=0.01, minTerms=10)
## 
## Building most specific GOs .....
##  ( 11502 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 15334 GO terms and 35660 relations. )
## 
## Annotating nodes ...............
##  ( 13136 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 4794 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 19:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 18:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  6 nodes to be scored    (19 eliminated genes)
## 
##   Level 16:  11 nodes to be scored   (24 eliminated genes)
## 
##   Level 15:  27 nodes to be scored   (93 eliminated genes)
## 
##   Level 14:  66 nodes to be scored   (195 eliminated genes)
## 
##   Level 13:  117 nodes to be scored  (582 eliminated genes)
## 
##   Level 12:  198 nodes to be scored  (1755 eliminated genes)
## 
##   Level 11:  350 nodes to be scored  (3869 eliminated genes)
## 
##   Level 10:  541 nodes to be scored  (5708 eliminated genes)
## 
##   Level 9:   687 nodes to be scored  (7347 eliminated genes)
## 
##   Level 8:   723 nodes to be scored  (9345 eliminated genes)
## 
##   Level 7:   731 nodes to be scored  (10788 eliminated genes)
## 
##   Level 6:   612 nodes to be scored  (11732 eliminated genes)
## 
##   Level 5:   394 nodes to be scored  (12273 eliminated genes)
## 
##   Level 4:   212 nodes to be scored  (12701 eliminated genes)
## 
##   Level 3:   96 nodes to be scored   (12855 eliminated genes)
## 
##   Level 2:   20 nodes to be scored   (12925 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13004 eliminated genes)
write.table(ResBPAll$ResAll, file=paste0(OutputFolder, 'TopGO/BPAllResults.txt'), sep='\t', row.names=FALSE)

Biological Process Analysis for genes down in the 2 systems: 278 genes

# I generate a list that contains the association between each gene and the GO terms that are associated to it
BPannDown <- topGO::annFUN.org(whichOnto="BP", feasibleGenes=names(GeneVectors$DEGenesDown), mapping="org.Hs.eg.db", ID="symbol") %>% inverseList()

# Wrapper function for topGO analysis (see helper file)
ResBPAllDown <- topGOResults(Genes=GeneVectors$DEGenesDown, gene2GO=BPannDown, ontology='BP', description=NULL, nodeSize=10, algorithm='weight01', statistic='fisher', EnTh=2, PvalTh=0.01, minTerms=10)
## 
## Building most specific GOs .....
##  ( 11502 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 15334 GO terms and 35660 relations. )
## 
## Annotating nodes ...............
##  ( 13136 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 3504 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 19:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 18:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  5 nodes to be scored    (19 eliminated genes)
## 
##   Level 16:  6 nodes to be scored    (24 eliminated genes)
## 
##   Level 15:  17 nodes to be scored   (77 eliminated genes)
## 
##   Level 14:  39 nodes to be scored   (144 eliminated genes)
## 
##   Level 13:  70 nodes to be scored   (426 eliminated genes)
## 
##   Level 12:  125 nodes to be scored  (1386 eliminated genes)
## 
##   Level 11:  229 nodes to be scored  (3459 eliminated genes)
## 
##   Level 10:  353 nodes to be scored  (5181 eliminated genes)
## 
##   Level 9:   478 nodes to be scored  (6816 eliminated genes)
## 
##   Level 8:   505 nodes to be scored  (8827 eliminated genes)
## 
##   Level 7:   570 nodes to be scored  (10360 eliminated genes)
## 
##   Level 6:   494 nodes to be scored  (11535 eliminated genes)
## 
##   Level 5:   324 nodes to be scored  (12192 eliminated genes)
## 
##   Level 4:   184 nodes to be scored  (12665 eliminated genes)
## 
##   Level 3:   82 nodes to be scored   (12841 eliminated genes)
## 
##   Level 2:   20 nodes to be scored   (12923 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13003 eliminated genes)
write.table(ResBPAllDown$ResAll, file=paste0(OutputFolder, 'TopGO/BPAllResultsDown.txt'), sep='\t', row.names=FALSE)

Biological Process Analysis for genes up in the 2 systems: 339 genes

# I generate a list that contains the association between each gene and the GO terms that are associated to it
BPannUp <- topGO::annFUN.org(whichOnto="BP", feasibleGenes=names(GeneVectors$DEGenesUp), mapping="org.Hs.eg.db", ID="symbol") %>% inverseList()

# Wrapper function for topGO analysis (see helper file)
ResBPAllUp <- topGOResults(Genes=GeneVectors$DEGenesUp, gene2GO=BPannUp, ontology='BP', description=NULL, nodeSize=10, algorithm='weight01', statistic='fisher', EnTh=2, PvalTh=0.01, minTerms=10)
## 
## Building most specific GOs .....
##  ( 11502 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 15334 GO terms and 35660 relations. )
## 
## Annotating nodes ...............
##  ( 13136 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 4066 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 17:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  6 nodes to be scored    (0 eliminated genes)
## 
##   Level 15:  18 nodes to be scored   (44 eliminated genes)
## 
##   Level 14:  51 nodes to be scored   (113 eliminated genes)
## 
##   Level 13:  102 nodes to be scored  (503 eliminated genes)
## 
##   Level 12:  165 nodes to be scored  (1641 eliminated genes)
## 
##   Level 11:  280 nodes to be scored  (3740 eliminated genes)
## 
##   Level 10:  446 nodes to be scored  (5515 eliminated genes)
## 
##   Level 9:   562 nodes to be scored  (7029 eliminated genes)
## 
##   Level 8:   610 nodes to be scored  (8992 eliminated genes)
## 
##   Level 7:   621 nodes to be scored  (10591 eliminated genes)
## 
##   Level 6:   534 nodes to be scored  (11668 eliminated genes)
## 
##   Level 5:   365 nodes to be scored  (12207 eliminated genes)
## 
##   Level 4:   194 nodes to be scored  (12695 eliminated genes)
## 
##   Level 3:   89 nodes to be scored   (12848 eliminated genes)
## 
##   Level 2:   20 nodes to be scored   (12925 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13004 eliminated genes)
write.table(ResBPAllUp$ResAll, file=paste0(OutputFolder, 'TopGO/BPAllResultsUp.txt'), sep='\t', row.names=FALSE)

Biological Process Analysis for genes discordant in the 2 systems: 777 genes

# I generate a list that contains the association between each gene and the GO terms that are associated to it
BPannDisc <- topGO::annFUN.org(whichOnto="BP", feasibleGenes=names(GeneVectors$DEGenesDiscordant), mapping="org.Hs.eg.db", ID="symbol") %>% inverseList()

# Wrapper function for topGO analysis (see helper file)
ResBPDisc <- topGOResults(Genes=GeneVectors$DEGenesDiscordant, gene2GO=BPannDisc, ontology='BP', description=NULL, nodeSize=10, algorithm='weight01', statistic='fisher', EnTh=2, PvalTh=0.01, minTerms=10)
## 
## Building most specific GOs .....
##  ( 11502 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 15334 GO terms and 35660 relations. )
## 
## Annotating nodes ...............
##  ( 13136 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 4993 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 19:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 18:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  6 nodes to be scored    (19 eliminated genes)
## 
##   Level 16:  12 nodes to be scored   (24 eliminated genes)
## 
##   Level 15:  27 nodes to be scored   (93 eliminated genes)
## 
##   Level 14:  67 nodes to be scored   (245 eliminated genes)
## 
##   Level 13:  127 nodes to be scored  (564 eliminated genes)
## 
##   Level 12:  223 nodes to be scored  (1730 eliminated genes)
## 
##   Level 11:  365 nodes to be scored  (3897 eliminated genes)
## 
##   Level 10:  584 nodes to be scored  (5868 eliminated genes)
## 
##   Level 9:   714 nodes to be scored  (7469 eliminated genes)
## 
##   Level 8:   769 nodes to be scored  (9379 eliminated genes)
## 
##   Level 7:   736 nodes to be scored  (10801 eliminated genes)
## 
##   Level 6:   619 nodes to be scored  (11749 eliminated genes)
## 
##   Level 5:   409 nodes to be scored  (12297 eliminated genes)
## 
##   Level 4:   214 nodes to be scored  (12711 eliminated genes)
## 
##   Level 3:   97 nodes to be scored   (12851 eliminated genes)
## 
##   Level 2:   21 nodes to be scored   (12925 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13004 eliminated genes)
write.table(ResBPDisc$ResAll, file=paste0(OutputFolder, 'TopGO/BPAllResultsDisc.txt'), sep='\t', row.names=FALSE)

Result visualization: Barplot for consistent DEGs

# the if clauses avoids an error in case there is one empty category

TopGOBar <- topGOBarplot(TopGORes=ResBPAll$ResSel, terms=8, pvalTh=0.01, title=NULL, fill=NULL)

TopGOBar


ggsave(filename=paste0(OutputFolder, '/TopGO/BarplotSingle.pdf'), TopGOBar, 
       width=13, height=8)
# the if clauses avoids an error in case there is one empty category

TopGOBar <- topGOBarplot(TopGORes=ResBPAll$ResSel, terms=8, pvalTh=0.01, title=NULL)

TopGOBar


ggsave(filename=paste0(OutputFolder, '/TopGO/BarplotSingle.pdf'), TopGOBar, 
       width=13, height=8)
# the if clauses avoids an error in case there is one empty category
if(dim(ResBPAllUp$ResSel)[1] > 0 & dim(ResBPAllDown$ResSel)[1] > 0 & dim(ResBPAll$ResSel)[1] > 0 & dim(ResBPDisc$ResSel)[1] > 0){

TopGOBar <- topGOBarplotAll(TopGOResAll=ResBPAll$ResSel, TopGOResDown=ResBPAllDown$ResSel, TopGOResUp=ResBPAllUp$ResSel, terms=8, pvalTh=0.01, title=NULL)

TopGOBar

ggsave(filename=paste0(OutputFolder, '/TopGO/Barplot.pdf'), TopGOBar, 
       width=13, height=8)
  }

Result visualization: Barplot for discordant DEGs

# the if clauses avoids an error in case there is one empty category

TopGOBar <- topGOBarplot(TopGORes=ResBPDisc$ResSel, terms=8, pvalTh=0.01, title=NULL, fill = NULL)

TopGOBar


ggsave(filename=paste0(OutputFolder, '/TopGO/BarplotDisc.pdf'), TopGOBar, 
       width=13, height=8)
# the if clauses avoids an error in case there is one empty category

TopGOBar <- topGOBarplot(TopGORes=ResBPDisc$ResSel, terms=8, pvalTh=0.01, title=NULL)

TopGOBar


ggsave(filename=paste0(OutputFolder, '/TopGO/BarplotDisc.pdf'), TopGOBar, 
       width=13, height=8)

GOSeq for Gene Ontology Enrichment analysis

GOSeqEval <- ifelse(params$GOSeq=='Yes', TRUE, FALSE)

Gene ontology enrichment analysis is performed on the sets of genes found from the cluster analysis between fetal and organoids.

GOSeq for genes consistent between the 2 systems

#descreasing the threshold to get an idea of the category
GOSeq <- goseq.enrichment(names(GeneVectors$DEGenes), names(GeneVectors$DEGenes)[GeneVectors$DEGenes==1], "GO:BP", cutoff=0.1)
## Loading hg19 length data...
#colnames(GOSeq) <- c("GO.ID", "Term", "Annotated", "Significant", "enrichment", "PValue", "FDR", "genes")
if(dim(GOSeq)[1] > 0) {
gotr <- goTreemap(GOSeq, removeParents=FALSE)
} # to avoid errors if no results are found
## Loading required package: treemap
## Preparing ancestors
## Warning in if (class(try(col2rgb(bg.labels), silent = TRUE)) == "try-error")
## stop("Invalid bg.labels"): the condition has length > 1 and only the first
## element will be used

GOSeq for genes discorant between the 2 systems

#descreasing the threshold to get an idea of the category
GOSeq <- goseq.enrichment(names(GeneVectors$DEGenesDiscordant), names(GeneVectors$DEGenesDiscordant)[GeneVectors$DEGenesDiscordant==1], "GO:BP", cutoff=0.1)
## Loading hg19 length data...
#colnames(GOSeq) <- c("GO.ID", "Term", "Annotated", "Significant", "enrichment", "PValue", "FDR", "genes")
if(dim(GOSeq)[1] > 0) {
gotr <- goTreemap(GOSeq, removeParents=FALSE)
} # to avoid errors if no results are found
## Preparing ancestors
## Warning in if (class(try(col2rgb(bg.labels), silent = TRUE)) == "try-error")
## stop("Invalid bg.labels"): the condition has length > 1 and only the first
## element will be used


TOPGO for genes only altered in CO

UniverseGenes <- union(rownames(DEAs$chronic.org),rownames(DEAs$chronic.fetal))
DEG <- dc$only_org

# generation of named vectors in the format required by TopGO
GeneVectors <- list()
GeneVectors$DEGenes <- factor(as.integer(UniverseGenes%in%DEG))
names(GeneVectors$DEGenes) <- UniverseGenes

Therefore:

  • universe genes: 16606 genes
  • modulated genes only in organoids: 367 genes
BpEval <- ifelse(params$TopGO=='Yes', TRUE, FALSE)
# the analysis is not done if the number of DEGs is lower than 2.
BpEval <- ifelse(table(GeneVectors$DEGenes)['1'] < 2 | is.na(table(GeneVectors$DEGenes)['1']), FALSE, BpEval)

On the basis of the analysis settings or the number of differentially expressed genes, TopGO analysis IS performed.

Biological Process for genes modulated in organoids: 367 genes

# I generate a list that contains the association between each gene and the GO terms that are associated to it
BPann <- topGO::annFUN.org(whichOnto="BP", feasibleGenes=names(GeneVectors$DEGenes), mapping="org.Hs.eg.db", ID="symbol") %>% inverseList()

# Wrapper function for topGO analysis (see helper file)
ResBPAll <- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=BPann, ontology='BP', description=NULL, nodeSize=10, algorithm='weight01', statistic='fisher', EnTh=2, PvalTh=0.01, minTerms=10)
## 
## Building most specific GOs .....
##  ( 11502 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 15334 GO terms and 35660 relations. )
## 
## Annotating nodes ...............
##  ( 13136 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 3819 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 19:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 18:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  5 nodes to be scored    (19 eliminated genes)
## 
##   Level 16:  12 nodes to be scored   (24 eliminated genes)
## 
##   Level 15:  25 nodes to be scored   (93 eliminated genes)
## 
##   Level 14:  42 nodes to be scored   (245 eliminated genes)
## 
##   Level 13:  77 nodes to be scored   (555 eliminated genes)
## 
##   Level 12:  145 nodes to be scored  (1338 eliminated genes)
## 
##   Level 11:  239 nodes to be scored  (3448 eliminated genes)
## 
##   Level 10:  408 nodes to be scored  (5320 eliminated genes)
## 
##   Level 9:   525 nodes to be scored  (7049 eliminated genes)
## 
##   Level 8:   579 nodes to be scored  (8947 eliminated genes)
## 
##   Level 7:   597 nodes to be scored  (10509 eliminated genes)
## 
##   Level 6:   514 nodes to be scored  (11629 eliminated genes)
## 
##   Level 5:   353 nodes to be scored  (12241 eliminated genes)
## 
##   Level 4:   189 nodes to be scored  (12682 eliminated genes)
## 
##   Level 3:   86 nodes to be scored   (12841 eliminated genes)
## 
##   Level 2:   20 nodes to be scored   (12923 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13004 eliminated genes)
write.table(ResBPAll$ResAll, file=paste0(OutputFolder, 'TopGO/BPAllResultsOrganoids.txt'), sep='\t', row.names=FALSE)

Result visualization: Barplot

# the if clauses avoids an error in case there is one empty category

TopGOBar <- topGOBarplot(TopGORes =ResBPAll$ResSel, terms=8, pvalTh=0.01, title=NULL, fill = NULL)

TopGOBar


ggsave(filename=paste0(OutputFolder, '/TopGO/BarplotDisc.pdf'), TopGOBar, 
       width=13, height=8)
TopGOBar <- topGOBarplot(TopGORes =ResBPAll$ResSel, terms=8, pvalTh=0.01, title=NULL)

TopGOBar


ggsave(filename=paste0(OutputFolder, '/TopGO/BarplotOrganoids.pdf'), TopGOBar, 
       width=13, height=8)

GOSeq for genes dysregulated only in organoids

#descreasing the threshold to get an idea of the category
GOSeq <- goseq.enrichment(names(GeneVectors$DEGenes), dc$org.only, "GO:BP", cutoff=0.3)
#colnames(GOSeq) <- c("GO.ID", "Term", "Annotated", "Significant", "enrichment", "PValue", "FDR", "genes")
if(dim(GOSeq)[1] > 0) {
gotr <- goTreemap(GOSeq, removeParents=FALSE)
} # to avoid errors if no results are found

TOPGO for genes only altered in HFPNSC

UniverseGenes <- union(rownames(DEAs$chronic.org),rownames(DEAs$chronic.fetal))
DEG <- dc$only_fetal

# generation of named vectors in the format required by TopGO
GeneVectors <- list()
GeneVectors$DEGenes <- factor(as.integer(UniverseGenes%in%DEG))
names(GeneVectors$DEGenes) <- UniverseGenes

Therefore:

  • universe genes: 16606 genes
  • modulated genes only in HFPNSC: 217 genes
BpEval <- ifelse(params$TopGO=='Yes', TRUE, FALSE)
# the analysis is not done if the number of DEGs is lower than 2.
BpEval <- ifelse(table(GeneVectors$DEGenes)['1'] < 2 | is.na(table(GeneVectors$DEGenes)['1']), FALSE, BpEval)

On the basis of the analysis settings or the number of differentially expressed genes, TopGO analysis IS performed.

Biological Process for genes modulated in HFPNSC: 217 genes

# I generate a list that contains the association between each gene and the GO terms that are associated to it
BPann <- topGO::annFUN.org(whichOnto="BP", feasibleGenes=names(GeneVectors$DEGenes), mapping="org.Hs.eg.db", ID="symbol") %>% inverseList()

# Wrapper function for topGO analysis (see helper file)
ResBPAll <- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=BPann, ontology='BP', description=NULL, nodeSize=10, algorithm='weight01', statistic='fisher', EnTh=2, PvalTh=0.01, minTerms=10)
## 
## Building most specific GOs .....
##  ( 11502 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 15334 GO terms and 35660 relations. )
## 
## Annotating nodes ...............
##  ( 13136 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 2423 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 19:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 18:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  4 nodes to be scored    (19 eliminated genes)
## 
##   Level 16:  3 nodes to be scored    (24 eliminated genes)
## 
##   Level 15:  5 nodes to be scored    (51 eliminated genes)
## 
##   Level 14:  22 nodes to be scored   (93 eliminated genes)
## 
##   Level 13:  51 nodes to be scored   (168 eliminated genes)
## 
##   Level 12:  94 nodes to be scored   (1064 eliminated genes)
## 
##   Level 11:  161 nodes to be scored  (3056 eliminated genes)
## 
##   Level 10:  244 nodes to be scored  (5108 eliminated genes)
## 
##   Level 9:   304 nodes to be scored  (6681 eliminated genes)
## 
##   Level 8:   328 nodes to be scored  (8498 eliminated genes)
## 
##   Level 7:   357 nodes to be scored  (9742 eliminated genes)
## 
##   Level 6:   345 nodes to be scored  (11241 eliminated genes)
## 
##   Level 5:   263 nodes to be scored  (12013 eliminated genes)
## 
##   Level 4:   147 nodes to be scored  (12603 eliminated genes)
## 
##   Level 3:   73 nodes to be scored   (12823 eliminated genes)
## 
##   Level 2:   19 nodes to be scored   (12923 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13002 eliminated genes)
## Warning in mask$eval_all_filter(dots, env_filter): NAs introduced by coercion
write.table(ResBPAll$ResAll, file=paste0(OutputFolder, 'TopGO/BPAllResultsHFPNSC.txt'), sep='\t', row.names=FALSE)

Result visualization: Barplot

# the if clauses avoids an error in case there is one empty category

TopGOBar <- topGOBarplot(TopGORes =ResBPAll$ResSel, terms=8, pvalTh=0.01, title=NULL, fill = NULL)

TopGOBar


ggsave(filename=paste0(OutputFolder, '/TopGO/BarplotDisc.pdf'), TopGOBar, 
       width=13, height=8)
TopGOBar <- topGOBarplot(TopGORes=ResBPAll$ResSel, terms=8, pvalTh=0.01, title=NULL)

TopGOBar


ggsave(filename=paste0(OutputFolder, '/TopGO/BarplotHFPNSC.pdf'), TopGOBar, 
       width=13, height=8)

GOSeq for genes dysregulated only in fetal

#descreasing the threshold to get an idea of the category
GOSeq <- goseq.enrichment(names(GeneVectors$DEGenes), dc$fetal.only, "GO:BP", cutoff=0.3)
#colnames(GOSeq) <- c("GO.ID", "Term", "Annotated", "Significant", "enrichment", "PValue", "FDR", "genes")
if(dim(GOSeq)[1] > 0) {
gotr <- goTreemap(GOSeq, removeParents=FALSE)
} # to avoid errors if no results are found

Authors

Nicolò Caporale: ,

Cristina Cheroni:

Pierre-Luc Germain:

Giuseppe Testa:

Lab: http://www.testalab.eu/

‘Date: December 01, 2021’

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] treemap_2.4-2          goseq_1.42.0           geneLenDataBase_1.26.0
##  [4] BiasedUrn_1.07         dplyr_1.0.5            tidyr_1.1.3           
##  [7] fgsea_1.16.0           topGO_2.42.0           SparseM_1.81          
## [10] GO.db_3.12.1           graph_1.68.0           RColorBrewer_1.1-2    
## [13] gridExtra_2.3          org.Hs.eg.db_3.12.0    AnnotationDbi_1.52.0  
## [16] IRanges_2.24.1         S4Vectors_0.28.1       Biobase_2.50.0        
## [19] BiocGenerics_0.36.0    ggrepel_0.9.1          ggplot2_3.3.3         
## [22] DT_0.17                stringr_1.4.0         
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_2.0-0            ellipsis_0.3.1             
##  [3] XVector_0.30.0              GenomicRanges_1.42.0       
##  [5] farver_2.1.0                bit64_4.0.5                
##  [7] fansi_0.4.2                 xml2_1.3.2                 
##  [9] splines_4.0.3               cachem_1.0.4               
## [11] knitr_1.31                  jsonlite_1.7.2             
## [13] Rsamtools_2.6.0             gridBase_0.4-7             
## [15] dbplyr_2.1.0                shiny_1.6.0                
## [17] compiler_4.0.3              httr_1.4.2                 
## [19] assertthat_0.2.1            Matrix_1.3-2               
## [21] fastmap_1.1.0               later_1.1.0.1              
## [23] htmltools_0.5.1.1           prettyunits_1.1.1          
## [25] tools_4.0.3                 igraph_1.2.6               
## [27] gtable_0.3.0                glue_1.4.2                 
## [29] GenomeInfoDbData_1.2.4      rappdirs_0.3.3             
## [31] fastmatch_1.1-0             Rcpp_1.0.6                 
## [33] jquerylib_0.1.3             vctrs_0.3.7                
## [35] Biostrings_2.58.0           nlme_3.1-152               
## [37] rtracklayer_1.50.0          xfun_0.21                  
## [39] mime_0.10                   lifecycle_1.0.0            
## [41] XML_3.99-0.5                zlibbioc_1.36.0            
## [43] scales_1.1.1                hms_1.0.0                  
## [45] promises_1.2.0.1            MatrixGenerics_1.2.1       
## [47] SummarizedExperiment_1.20.0 yaml_2.2.1                 
## [49] curl_4.3                    memoise_2.0.0              
## [51] sass_0.3.1                  biomaRt_2.46.3             
## [53] stringi_1.5.3               RSQLite_2.2.3              
## [55] highr_0.8                   GenomicFeatures_1.42.1     
## [57] BiocParallel_1.24.1         GenomeInfoDb_1.26.2        
## [59] rlang_0.4.10                pkgconfig_2.0.3            
## [61] matrixStats_0.58.0          bitops_1.0-6               
## [63] evaluate_0.14               lattice_0.20-41            
## [65] purrr_0.3.4                 GenomicAlignments_1.26.0   
## [67] htmlwidgets_1.5.3           bit_4.0.4                  
## [69] tidyselect_1.1.0            magrittr_2.0.1             
## [71] R6_2.5.0                    generics_0.1.0             
## [73] DelayedArray_0.16.1         DBI_1.1.1                  
## [75] pillar_1.5.1                withr_2.4.1                
## [77] mgcv_1.8-34                 RCurl_1.98-1.2             
## [79] tibble_3.1.0                crayon_1.4.1               
## [81] utf8_1.2.1                  BiocFileCache_1.14.0       
## [83] rmarkdown_2.7               progress_1.2.2             
## [85] grid_4.0.3                  data.table_1.14.0          
## [87] blob_1.2.1                  digest_0.6.27              
## [89] xtable_1.8-4                httpuv_1.5.5               
## [91] openssl_1.4.3               munsell_0.5.0              
## [93] bslib_0.2.4                 askpass_1.1