TopGO - CTL08

Condition_1 <- params$Condition_1
Condition_2 <- params$Condition_2

Top GO Analysis for Estr Agonist

1. Environment Set Up

library(RNASeqBulkExploratory)
library(DT)
library(ggplot2)
library(AnnotationDbi)
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
library(org.Hs.eg.db)
## 
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following object is masked from 'package:BiocGenerics':
## 
##     combine
library(RColorBrewer)
library(viridis)
## Loading required package: viridisLite
library(topGO)
## Loading required package: graph
## Loading required package: GO.db
## 
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
## 
## groupGOTerms:    GOBPTerm, GOMFTerm, GOCCTerm environments built.
## 
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
## 
##     members
library(data.table)
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:IRanges':
## 
##     shift
## The following objects are masked from 'package:S4Vectors':
## 
##     first, second
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:S4Vectors':
## 
##     expand
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following object is masked from 'package:graph':
## 
##     union
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(SummarizedExperiment)
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
## 
##     count
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## The following object is masked from 'package:Biobase':
## 
##     rowMedians
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
library(sechm)

source("../../plotGenesInTerm_v2.R")

Dataset <- params$Dataset
logFcTh <- params$logFcTh
FdrTh <- params$FdrTh
OutputFolder <- ifelse(is.null(params$OutputFolder), getwd(), params$OutputFolder) 


if (dir.exists(OutputFolder) == FALSE) {
  dir.create(OutputFolder, recursive=TRUE)
}

2. Data Upload

Summarized Experiment object containing expression data used for DEA and gene and sample metadata
DEA object, containing results of the differential expression

2.1 Load Data from DEA

# List with differential expression results 
DEA <- readRDS(params$DEAFile)

#SE object coming from DEA, but not containing specific contrast results
SE_DEA <- readRDS(params$SEFile)

2.2 Add DEA results to SE

if(! identical(rownames(SE_DEA), row.names(DEA[[Condition_1]][[Condition_2]]$Res))){
  stop('Expression data in SE and results from differential espression analysis are inconsistent.')
}
## Loading required package: DESeq2


rowData(SE_DEA) <- cbind(rowData(SE_DEA)[,1:6], DEA[[Condition_1]][[Condition_2]]$Res)
  
# Column names must be set to be compliant with the required format to be recognized by ORA
names(rowData(SE_DEA))[which(names(rowData(SE_DEA))=='log2FoldChange')] <- 'logFC'
names(rowData(SE_DEA))[which(names(rowData(SE_DEA))=='padj')] <- 'FDR'

#metadata(SE_DEA_Prel)$annotation <- 'hsa'

14850 genes in 20 samples have been testes for differential expression.

Imposing a threshold of 1 on the Log2FC and 0.01 on the FDR (as specified in parameters), 1702 genes are selected: 905 up-regulated genes and 797 down-regulated genes.

3. RESULTS NAVIGATION: Interactive Table

An interactive table show the results for the top 500 DEGs (ranked according to FDR).

DEGsTable(SE_DEA, FdrTh=0.01, logFcTh=1, maxGenes=500, saveDEGs=TRUE, outDir=OutputFolder)

## Top 500 genes will be included in the table

4. RESULTS VISUALIZATION

4.1 Volcano plot

plotVolcanoSE(SE=SE_DEA, FdrTh=FdrTh, logFcTh=logFcTh, FdrCeil=1e-10, logFcCeil=4)

4.2 Heatmap for significant genes

Heatmaps for DEGs, showing scaled vst values.

DEGs <- dplyr::filter(data.frame(rowData(SE_DEA)), FDR < FdrTh & abs(logFC) > logFcTh)   


ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')

colData(SE_DEA)$Condition <- factor(colData(SE_DEA)$Condition, levels=c("CTL", "DMSO", "AhHyd_Ag", "AhHyd_Inh", "Andr_Ag", "Andr_Inh", "Estr_Ag", "Estr_Inh", "GC_Ag", "GC_Inh", "LivX_Ag", "LivX_Inh", "Ret_Ag", "Ret_Inh", "Thyr_Ag", "Thyr_Inh" ))

metadata(SE_DEA)$anno_colors <- list(Condition = c('DMSO' = 'grey30', 'CTL' = 'azure3', 
                 'AhHyd_Ag'='#F8766D', 'AhHyd_Inh'='#F8766D50',
                 'Andr_Ag'='#fccb17', 'Andr_Inh'='#C49A0050',  
                 "Estr_Ag"= '#53B400', "Estr_Inh"= '#53B40050', 
                 'GC_Ag' = '#00C094', 'GC_Inh' = '#00C09450',
                 'LivX_Ag' = '#00B6EB', 'LivX_Inh' = '#00B6EB50', 
                 'Ret_Ag' = '#A58AFF', 'Ret_Inh' = '#A58AFF50', 
                 'Thyr_Ag' = '#FB61D7', 'Thyr_Inh' = '#FB61D750'
                 ), SeqRun = c("20210310" = "orange", "20210724"  = "green4", "20220422" = 
                                          "#66ACB7"))

sechm(SE_DEA, features=DEGs$GeneName, assayName="vst", gaps_at="Condition", show_rownames=FALSE,
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols, show_colnames=TRUE,
      do.scale=TRUE, breaks=0.85, column_title = "Scaled Vst Values")

5. TOPGO for Gene Ontology Enrichment analysis

Gene ontology enrichment analysis is performed on the set of 1702 genes using TopGO with Fisher statistics and weight01 algorithm.

For each specified domain of the ontology:

Enrichment analysis on all DEGs or splitted in down- and up-regulated

5.1 Selection of modulated genes and generation of gene vectors

GeneVectors <- topGOGeneVectors(SE_DEA, FdrTh=FdrTh, logFcTh=logFcTh)

## Gene vector contains levels: 0,1
## Gene vector contains levels: 0,1
## Gene vector contains levels: 0,1

Therefore:

universe genes: 14850 genes
modulated genes: 1702 genes
down-regulated genes: 797 genes of interest
up-regulated genes: 905 genes of interest

BpEval <- ifelse(length(grep('BP', params$TopGO))!=0, TRUE, FALSE)
MfEval <- ifelse(length(grep('MF', params$TopGO))!=0, TRUE, FALSE)
CcEval <- ifelse(length(grep('CC', params$TopGO))!=0, TRUE, FALSE)

5.2 TopGO analysis: Biological Process

On the basis of the analysis settings, the enrichment for Biological Process IS performed.

Biological Process Analysis for ALL modulated genes: 1702 genes

BPann <- topGO::annFUN.org(whichOnto="BP", feasibleGenes=names(GeneVectors$DEGenes), 
                           mapping="org.Hs.eg.db", ID="symbol") %>% inverseList()

# Wrapper function for topGO analysis 
ResBPAll <- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=BPann, ontology='BP', 
                         desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                         EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                         saveRes=TRUE, outDir=paste0(OutputFolder), fileName='BPAll')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 11591 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 15045 GO terms and 33988 relations. )
## 
## Annotating nodes ...............
##  ( 13128 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 4842 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 18:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  5 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  13 nodes to be scored   (18 eliminated genes)
## 
##   Level 15:  24 nodes to be scored   (85 eliminated genes)
## 
##   Level 14:  53 nodes to be scored   (241 eliminated genes)
## 
##   Level 13:  95 nodes to be scored   (521 eliminated genes)
## 
##   Level 12:  164 nodes to be scored  (1472 eliminated genes)
## 
##   Level 11:  344 nodes to be scored  (3453 eliminated genes)
## 
##   Level 10:  537 nodes to be scored  (5339 eliminated genes)
## 
##   Level 9:   685 nodes to be scored  (6978 eliminated genes)
## 
##   Level 8:   735 nodes to be scored  (8855 eliminated genes)
## 
##   Level 7:   778 nodes to be scored  (10520 eliminated genes)
## 
##   Level 6:   660 nodes to be scored  (11588 eliminated genes)
## 
##   Level 5:   411 nodes to be scored  (12291 eliminated genes)
## 
##   Level 4:   225 nodes to be scored  (12706 eliminated genes)
## 
##   Level 3:   93 nodes to be scored   (12882 eliminated genes)
## 
##   Level 2:   18 nodes to be scored   (12959 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13003 eliminated genes)

Biological Process Analysis for DOWN-REGULATED genes: 797 genes

# Wrapper function for topGO analysis 
ResBPDown <- topGOResults(Genes=GeneVectors$DEGenesDown, gene2GO=BPann, ontology='BP', 
                          desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                          EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                          saveRes=TRUE, outDir=paste0(OutputFolder), fileName='BPDown')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 11591 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 15045 GO terms and 33988 relations. )
## 
## Annotating nodes ...............
##  ( 13128 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 4364 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 17:  5 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  10 nodes to be scored   (0 eliminated genes)
## 
##   Level 15:  23 nodes to be scored   (85 eliminated genes)
## 
##   Level 14:  45 nodes to be scored   (203 eliminated genes)
## 
##   Level 13:  84 nodes to be scored   (509 eliminated genes)
## 
##   Level 12:  147 nodes to be scored  (1321 eliminated genes)
## 
##   Level 11:  299 nodes to be scored  (3244 eliminated genes)
## 
##   Level 10:  479 nodes to be scored  (5150 eliminated genes)
## 
##   Level 9:   615 nodes to be scored  (6690 eliminated genes)
## 
##   Level 8:   660 nodes to be scored  (8619 eliminated genes)
## 
##   Level 7:   695 nodes to be scored  (10269 eliminated genes)
## 
##   Level 6:   595 nodes to be scored  (11476 eliminated genes)
## 
##   Level 5:   385 nodes to be scored  (12186 eliminated genes)
## 
##   Level 4:   213 nodes to be scored  (12680 eliminated genes)
## 
##   Level 3:   90 nodes to be scored   (12874 eliminated genes)
## 
##   Level 2:   18 nodes to be scored   (12957 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13003 eliminated genes)

GOTable(ResBPDown$ResSel, maxGO=20)

Biological Process Analysis for UP-REGULATED genes: 905 genes

ResBPUp <- topGOResults(Genes=GeneVectors$DEGenesUp, gene2GO=BPann, ontology='BP', 
                        desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                        EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                        saveRes=TRUE, outDir=OutputFolder, fileName='BPUp')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 11591 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 15045 GO terms and 33988 relations. )
## 
## Annotating nodes ...............
##  ( 13128 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 4126 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 18:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  3 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  10 nodes to be scored   (18 eliminated genes)
## 
##   Level 15:  20 nodes to be scored   (73 eliminated genes)
## 
##   Level 14:  41 nodes to be scored   (203 eliminated genes)
## 
##   Level 13:  72 nodes to be scored   (453 eliminated genes)
## 
##   Level 12:  119 nodes to be scored  (1316 eliminated genes)
## 
##   Level 11:  240 nodes to be scored  (3272 eliminated genes)
## 
##   Level 10:  408 nodes to be scored  (5102 eliminated genes)
## 
##   Level 9:   570 nodes to be scored  (6635 eliminated genes)
## 
##   Level 8:   645 nodes to be scored  (8532 eliminated genes)
## 
##   Level 7:   696 nodes to be scored  (10347 eliminated genes)
## 
##   Level 6:   599 nodes to be scored  (11494 eliminated genes)
## 
##   Level 5:   384 nodes to be scored  (12273 eliminated genes)
## 
##   Level 4:   211 nodes to be scored  (12702 eliminated genes)
## 
##   Level 3:   88 nodes to be scored   (12878 eliminated genes)
## 
##   Level 2:   18 nodes to be scored   (12956 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13003 eliminated genes)

GOTable(ResBPUp$ResSel, maxGO=20)

Result visualization: Barplot

topGOBarplotAll(TopGOResAll=ResBPAll$ResSel, TopGOResDown=ResBPDown$ResSel, TopGOResUp=ResBPUp$ResSel, 
                terms=8, pvalTh=0.01, plotTitle=NULL)

Top Terms associated Genes

All

plotGenesInTerm_v2(ResBPAll$ResSel, ResBPAll$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle=NULL, Interactive=FALSE)

Down

plotGenesInTerm_v2(ResBPDown$ResSel, ResBPDown$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Down DEGs', Interactive=FALSE, fillCol='blue')

Up

plotGenesInTerm_v2(ResBPUp$ResSel, ResBPUp$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Up DEGs', Interactive=FALSE, fillCol='red')

5.3 TopGO analysis: Molecular Function

On the basis of the analysis settings, the enrichment for Molecular Function IS performed.

Molecular Function Enrichment for ALL modulated genes: 1702 genes

MFann <- topGO::annFUN.org(whichOnto='MF', feasibleGenes=names(GeneVectors$DEGenes), 
                           mapping='org.Hs.eg.db', ID='symbol') %>% inverseList()

# Wrapper function for topGO analysis 
ResMFAll <- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=MFann, ontology='MF', 
                         desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                         EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                         saveRes=TRUE, outDir=OutputFolder, fileName='MFAll')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 4114 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 4579 GO terms and 5967 relations. )
## 
## Annotating nodes ...............
##  ( 13476 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 818 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  8 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  13 nodes to be scored   (0 eliminated genes)
## 
##   Level 9:   27 nodes to be scored   (177 eliminated genes)
## 
##   Level 8:   61 nodes to be scored   (1256 eliminated genes)
## 
##   Level 7:   111 nodes to be scored  (3232 eliminated genes)
## 
##   Level 6:   159 nodes to be scored  (3964 eliminated genes)
## 
##   Level 5:   191 nodes to be scored  (5585 eliminated genes)
## 
##   Level 4:   174 nodes to be scored  (8545 eliminated genes)
## 
##   Level 3:   56 nodes to be scored   (10853 eliminated genes)
## 
##   Level 2:   17 nodes to be scored   (11643 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13356 eliminated genes)

Molecular Function Enrichment for DOWN-REGULATED genes: 797 genes

ResMFDown <- topGOResults(Genes=GeneVectors$DEGenesDown, gene2GO=MFann, ontology='MF', 
                          desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                          EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                          saveRes=TRUE, outDir=OutputFolder, fileName='MFDown')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 4114 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 4579 GO terms and 5967 relations. )
## 
## Annotating nodes ...............
##  ( 13476 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 699 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  7 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  9 nodes to be scored    (0 eliminated genes)
## 
##   Level 9:   23 nodes to be scored   (170 eliminated genes)
## 
##   Level 8:   50 nodes to be scored   (1183 eliminated genes)
## 
##   Level 7:   95 nodes to be scored   (3191 eliminated genes)
## 
##   Level 6:   133 nodes to be scored  (3847 eliminated genes)
## 
##   Level 5:   162 nodes to be scored  (5359 eliminated genes)
## 
##   Level 4:   150 nodes to be scored  (8322 eliminated genes)
## 
##   Level 3:   53 nodes to be scored   (10690 eliminated genes)
## 
##   Level 2:   16 nodes to be scored   (11567 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13343 eliminated genes)

GOTable(ResMFDown$ResSel, maxGO=20)

Molecular Function Analysis for UP-REGULATED genes: 905 genes

ResMFUp <- topGOResults(Genes=GeneVectors$DEGenesUp, gene2GO=MFann, ontology='MF', 
                        desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                        EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                        saveRes=TRUE, outDir=OutputFolder, fileName='MFUp')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 4114 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 4579 GO terms and 5967 relations. )
## 
## Annotating nodes ...............
##  ( 13476 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 705 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  7 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  10 nodes to be scored   (0 eliminated genes)
## 
##   Level 9:   22 nodes to be scored   (162 eliminated genes)
## 
##   Level 8:   47 nodes to be scored   (1205 eliminated genes)
## 
##   Level 7:   92 nodes to be scored   (3122 eliminated genes)
## 
##   Level 6:   130 nodes to be scored  (3788 eliminated genes)
## 
##   Level 5:   171 nodes to be scored  (5316 eliminated genes)
## 
##   Level 4:   160 nodes to be scored  (8339 eliminated genes)
## 
##   Level 3:   49 nodes to be scored   (10795 eliminated genes)
## 
##   Level 2:   16 nodes to be scored   (11625 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13354 eliminated genes)

GOTable(ResMFUp$ResSel, maxGO=20)

Result visualization: Barplot

topGOBarplotAll(TopGOResAll=ResMFAll$ResSel, TopGOResDown=ResMFDown$ResSel, TopGOResUp=ResMFUp$ResSel, 
                terms=8, pvalTh=0.01, plotTitle=NULL)

Top Terms associated Genes

All

plotGenesInTerm_v2(ResMFAll$ResSel, ResMFAll$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle=NULL, Interactive=FALSE)

Down

plotGenesInTerm_v2(ResMFDown$ResSel, ResMFDown$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Down DEGs', Interactive=FALSE, fillCol='blue')

Up

plotGenesInTerm_v2(ResMFUp$ResSel, ResMFUp$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Up DEGs', Interactive=FALSE, fillCol='red')

5.4 TopGO analysis: Cellular Component

On the basis of the analysis settings, the enrichment for Cellular Component IS performed.

Cellular Component Enrichment for ALL modulated genes: 1702 genes

CCann <- topGO::annFUN.org(whichOnto='CC', feasibleGenes=names(GeneVectors$DEGenes), 
                           mapping='org.Hs.eg.db', ID='symbol') %>% inverseList()

# Wrapper function for topGO analysis 
ResCCAll <- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=CCann, ontology='CC', 
                         desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                         EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                         saveRes=TRUE, outDir=OutputFolder, fileName='CCAll')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 1740 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 1936 GO terms and 3272 relations. )
## 
## Annotating nodes ...............
##  ( 13716 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 603 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 13:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 12:  3 nodes to be scored    (0 eliminated genes)
## 
##   Level 11:  27 nodes to be scored   (30 eliminated genes)
## 
##   Level 10:  62 nodes to be scored   (83 eliminated genes)
## 
##   Level 9:   90 nodes to be scored   (887 eliminated genes)
## 
##   Level 8:   89 nodes to be scored   (2808 eliminated genes)
## 
##   Level 7:   86 nodes to be scored   (5115 eliminated genes)
## 
##   Level 6:   85 nodes to be scored   (8498 eliminated genes)
## 
##   Level 5:   72 nodes to be scored   (10128 eliminated genes)
## 
##   Level 4:   48 nodes to be scored   (12043 eliminated genes)
## 
##   Level 3:   37 nodes to be scored   (13175 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (13557 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13659 eliminated genes)

Cellular Component Enrichment for DOWN-REGULATED genes: 797 genes

# Wrapper function for topGO analysis 
ResCCDown <- topGOResults(Genes=GeneVectors$DEGenesDown, gene2GO=CCann, ontology='CC', 
                          desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                          EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                          saveRes=TRUE, outDir=OutputFolder, fileName='CCDown')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 1740 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 1936 GO terms and 3272 relations. )
## 
## Annotating nodes ...............
##  ( 13716 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 499 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 13:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 12:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 11:  15 nodes to be scored   (30 eliminated genes)
## 
##   Level 10:  47 nodes to be scored   (66 eliminated genes)
## 
##   Level 9:   70 nodes to be scored   (615 eliminated genes)
## 
##   Level 8:   71 nodes to be scored   (2572 eliminated genes)
## 
##   Level 7:   76 nodes to be scored   (4916 eliminated genes)
## 
##   Level 6:   75 nodes to be scored   (8409 eliminated genes)
## 
##   Level 5:   62 nodes to be scored   (10093 eliminated genes)
## 
##   Level 4:   42 nodes to be scored   (12025 eliminated genes)
## 
##   Level 3:   35 nodes to be scored   (13172 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (13557 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13659 eliminated genes)

GOTable(ResCCDown$ResSel, maxGO=20)

Cellular Component Analysis for UP-REGULATED genes: 905 genes

# Wrapper function for topGO analysis 
ResCCUp <- topGOResults(Genes=GeneVectors$DEGenesUp, gene2GO=CCann, ontology='CC', 
                        desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                        EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                        saveRes=TRUE, outDir=OutputFolder, fileName='CCUp')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 1740 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 1936 GO terms and 3272 relations. )
## 
## Annotating nodes ...............
##  ( 13716 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 548 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 13:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 12:  3 nodes to be scored    (0 eliminated genes)
## 
##   Level 11:  23 nodes to be scored   (30 eliminated genes)
## 
##   Level 10:  55 nodes to be scored   (83 eliminated genes)
## 
##   Level 9:   79 nodes to be scored   (773 eliminated genes)
## 
##   Level 8:   81 nodes to be scored   (2684 eliminated genes)
## 
##   Level 7:   76 nodes to be scored   (4939 eliminated genes)
## 
##   Level 6:   83 nodes to be scored   (8455 eliminated genes)
## 
##   Level 5:   66 nodes to be scored   (10099 eliminated genes)
## 
##   Level 4:   42 nodes to be scored   (12034 eliminated genes)
## 
##   Level 3:   36 nodes to be scored   (13171 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (13557 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13659 eliminated genes)

GOTable(ResCCUp$ResSel, maxGO=20)

Result visualization: Barplot

topGOBarplotAll(TopGOResAll=ResCCAll$ResSel, TopGOResDown=ResCCDown$ResSel, TopGOResUp=ResCCUp$ResSel, 
                terms=8, pvalTh=0.01, plotTitle=NULL)

Top Terms associated Genes

All

plotGenesInTerm_v2(ResCCAll$ResSel, ResCCAll$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle=NULL, Interactive=FALSE)

Down

plotGenesInTerm_v2(ResCCDown$ResSel, ResCCDown$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Down DEGs', Interactive=FALSE, fillCol='blue')

Up

plotGenesInTerm_v2(ResCCUp$ResSel, ResCCUp$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Up DEGs', Interactive=FALSE, fillCol='red')

6. Savings

SessionInfo <- sessionInfo()
Date <- date()
#
save.image(paste0(OutputFolder, Dataset, 'FunctionalAnalysisWorkspace.RData'))

SessionInfo

## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] DESeq2_1.38.3               sechm_1.6.0                
##  [3] SummarizedExperiment_1.28.0 GenomicRanges_1.50.2       
##  [5] GenomeInfoDb_1.34.9         MatrixGenerics_1.10.0      
##  [7] matrixStats_0.63.0          dplyr_1.1.0                
##  [9] tidyr_1.3.0                 data.table_1.14.8          
## [11] topGO_2.50.0                SparseM_1.81               
## [13] GO.db_3.16.0                graph_1.76.0               
## [15] viridis_0.6.2               viridisLite_0.4.1          
## [17] RColorBrewer_1.1-3          gridExtra_2.3              
## [19] org.Hs.eg.db_3.16.0         AnnotationDbi_1.60.0       
## [21] IRanges_2.32.0              S4Vectors_0.36.1           
## [23] Biobase_2.58.0              BiocGenerics_0.44.0        
## [25] ggplot2_3.4.1               DT_0.27                    
## [27] RNASeqBulkExploratory_0.2.1
## 
## loaded via a namespace (and not attached):
##  [1] Rtsne_0.16             colorspace_2.1-0       rjson_0.2.21          
##  [4] ellipsis_0.3.2         circlize_0.4.15        XVector_0.38.0        
##  [7] GlobalOptions_0.1.2    clue_0.3-64            rstudioapi_0.14       
## [10] farver_2.1.1           bit64_4.0.5            fansi_1.0.4           
## [13] codetools_0.2-19       doParallel_1.0.17      cachem_1.0.7          
## [16] geneplotter_1.76.0     knitr_1.42             jsonlite_1.8.4        
## [19] annotate_1.76.0        cluster_2.1.4          png_0.1-8             
## [22] compiler_4.2.1         httr_1.4.5             lazyeval_0.2.2        
## [25] Matrix_1.5-3           fastmap_1.1.1          cli_3.6.1             
## [28] htmltools_0.5.4        tools_4.2.1            gtable_0.3.1          
## [31] glue_1.6.2             GenomeInfoDbData_1.2.9 V8_4.2.2              
## [34] Rcpp_1.0.10            jquerylib_0.1.4        vctrs_0.6.2           
## [37] Biostrings_2.66.0      iterators_1.0.14       crosstalk_1.2.0       
## [40] xfun_0.37              stringr_1.5.0          lifecycle_1.0.3       
## [43] XML_3.99-0.13          ca_0.71.1              zlibbioc_1.44.0       
## [46] scales_1.2.1           TSP_1.2-2              parallel_4.2.1        
## [49] ComplexHeatmap_2.14.0  yaml_2.3.7             curl_5.0.0            
## [52] memoise_2.0.1          sass_0.4.5             stringi_1.7.12        
## [55] RSQLite_2.3.0          highr_0.10             randomcoloR_1.1.0.1   
## [58] foreach_1.5.2          seriation_1.4.1        BiocParallel_1.32.5   
## [61] shape_1.4.6            rlang_1.1.1            pkgconfig_2.0.3       
## [64] bitops_1.0-7           evaluate_0.20          lattice_0.20-45       
## [67] purrr_1.0.1            labeling_0.4.2         htmlwidgets_1.6.1     
## [70] bit_4.0.5              tidyselect_1.2.0       magrittr_2.0.3        
## [73] R6_2.5.1               generics_0.1.3         DelayedArray_0.24.0   
## [76] DBI_1.1.3              pillar_1.8.1           withr_2.5.0           
## [79] KEGGREST_1.38.0        RCurl_1.98-1.10        tibble_3.2.1          
## [82] crayon_1.5.2           utf8_1.2.3             plotly_4.10.1         
## [85] rmarkdown_2.20         GetoptLong_1.0.5       locfit_1.5-9.7        
## [88] grid_4.2.1             blob_1.2.3             digest_0.6.31         
## [91] xtable_1.8-4           munsell_0.5.0          registry_0.5-1        
## [94] bslib_0.4.2

Date

## [1] "Fri Jul 18 18:07:12 2025"

TopGO - CTL08

Date: July 18, 2025

1. Environment Set Up

2. Data Upload

2.1 Load Data from DEA

2.2 Add DEA results to SE

3. RESULTS NAVIGATION: Interactive Table

4. RESULTS VISUALIZATION

4.1 Volcano plot

4.2 Heatmap for significant genes

5. TOPGO for Gene Ontology Enrichment analysis

5.1 Selection of modulated genes and generation of gene vectors

5.2 TopGO analysis: Biological Process

Biological Process Analysis for ALL modulated genes: 1702 genes

Biological Process Analysis for DOWN-REGULATED genes: 797 genes

Biological Process Analysis for UP-REGULATED genes: 905 genes

Result visualization: Barplot

Top Terms associated Genes

All

Down

Up

5.3 TopGO analysis: Molecular Function

Molecular Function Enrichment for ALL modulated genes: 1702 genes

Molecular Function Enrichment for DOWN-REGULATED genes: 797 genes

Molecular Function Analysis for UP-REGULATED genes: 905 genes

Result visualization: Barplot

Top Terms associated Genes

All

Down

Up

5.4 TopGO analysis: Cellular Component

Cellular Component Enrichment for ALL modulated genes: 1702 genes

Cellular Component Enrichment for DOWN-REGULATED genes: 797 genes

Cellular Component Analysis for UP-REGULATED genes: 905 genes

Result visualization: Barplot

Top Terms associated Genes

All

Down

Up

6. Savings