RNASeq Characterization of differential expression results: MixN chronic treatment in organoids

1. Environment Set Up

1.1 Parameters

for (i in 1:length(params))
  print(paste('Parameter:', names(params)[i], ' - Value:', params[[i]], '- Class:', class(params[[i]])))
## [1] "Parameter: Dataset  - Value: OrganoidChronic - Class: character"
## [1] "Parameter: topTagFile  - Value: Data/AllSEcorrected.RData - Class: character"
## [1] "Parameter: FDRTr  - Value: 0.05 - Class: numeric"
## [1] "Parameter: LogFCTr  - Value: 0.5 - Class: numeric"
## [1] "Parameter: OutputFolder  - Value: Data/FunctionalAnalysis/OrganoidChronicMixN/ - Class: character"
## [1] "Parameter: GOSeq  - Value: Yes - Class: character"
## [1] "Parameter: TopGO  - Value: Yes - Class: character"
## [1] "Parameter: Camera  - Value: Yes - Class: character"
## [1] "Parameter: Gsea  - Value: Yes - Class: character"
## [1] "Parameter: GseaPvalSel  - Value: 0.05 - Class: numeric"

Dataset <- params$Dataset
LogFCTh <- params$LogFCTr
FDRTh <- params$FDRTr
OutputFolder <- ifelse(is.null(params$OutputFolder), getwd(), params$OutputFolder) 

if (dir.exists(OutputFolder) == FALSE) {
  dir.create(OutputFolder, recursive=FALSE)
}

if (dir.exists(paste0(OutputFolder, 'TopGO')) == FALSE) {
  dir.create(paste0(OutputFolder, 'TopGO'), recursive=FALSE)
}

if (dir.exists(paste0(OutputFolder, 'Gsea')) == FALSE) {
  dir.create(paste0(OutputFolder, 'Gsea'), recursive=FALSE)
}

1.2 Libraries and functions

library(stringr)
library(DT)
library(ggplot2)
library(ggrepel)
library(AnnotationDbi)
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
library(org.Hs.eg.db)
## 
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following object is masked from 'package:BiocGenerics':
## 
##     combine
library(RColorBrewer)
library(topGO)
## Loading required package: graph
## 
## Attaching package: 'graph'
## The following object is masked from 'package:stringr':
## 
##     boundary
## Loading required package: GO.db
## 
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
## 
## groupGOTerms:    GOBPTerm, GOMFTerm, GOCCTerm environments built.
## 
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
## 
##     members
library(fgsea)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:S4Vectors':
## 
##     expand
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:graph':
## 
##     union
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

options(stringsAsFactors=FALSE)

source('Functions/EDC_Functions.R')
source('Functions/CriFormatted.R')
source('Functions/goseq.R')

1.3 Gene sets

Molecular Signature Database: H1 (hallmark gene sets); KEGG gene sets

H1 <- gmtPathways('Data/MolecularSignatureDatabase/H/h.all.v7.0.symbols.gmt')
Kegg <- gmtPathways('Data/MolecularSignatureDatabase/C2/KEGG/c2.cp.kegg.v7.0.symbols.gmt')

2. Data Upload: differential expression results

Upload the result 0f differential expression analysis after SVA correction.
Select DEGs as having an FDR < 0.05 (anova-like FDR) and a log2FC > 0.5 either in Mix 1X or Mix 1000X. Calculate the mean fold-change in the two treatments.

load(params$topTagFile, verbose=TRUE)
## Loading objects:
##   SEs
##   DEAs

Res <- list()
Res$tableRaw <- DEAs$chronic.org
Res$table <- DEAs$chronic.org
Res$table$genes <- row.names(Res$table)
Res$table$logFC <- (Res$table$logFC.EXPO1X + Res$table$logFC.EXPO1000X)/2
Res$table$GeneName <- row.names(Res$table) # I generated this column to make the dataframe compatible with some functions
Res$degs <- dplyr::filter(Res$table, FDR < FDRTh & (abs(logFC.EXPO1X) > LogFCTh | abs(logFC.EXPO1000X) > LogFCTh) & logCPM >0)
dim(Res$degs) 
## [1] 599   9

16314 genes have been testes for differential expression.

Imposing a threshold of 1.4142136 on the FC and 0.05 on the FDR (as specified in parameters), 599 genes are selected.

3. RESULTS NAVIGATION: Interactive Table

From topTag I generate an interactive table for result interrogation with link to the Gene Cards. The table reports all the genes having a FDR < 0.05 and a Log2FC > 0.5 for at least one of the two treatments as absolute value, according to the threshold settings.

searchURL <- 'https://www.genecards.org/cgi-bin/carddisp.pl?gene='
# First part of the URL that will be used to generate the link

Res$table %>% 
  dplyr::filter(FDR < FDRTh & (abs(logFC.EXPO1X)>LogFCTh | abs(logFC.EXPO1000X)>LogFCTh) & logCPM >0) %>%
  dplyr::mutate(GeneLink=paste0('<a href="', searchURL, genes, '">', genes, '</a>')) %>% 
  # generation of the link
  dplyr::select(9, 10, 8, 1:2, 6) %>% # selection of columns to be shown
  DT::datatable(class = 'hover', rownames=FALSE, caption='Differential expression results', filter='top', 
            extensions='Buttons', options = list(pageLength=10, autoWidth=TRUE, dom='Bfrtip', 
                                                 buttons=c('csv', 'excel')), escape=FALSE) %>%
  formatRound(c(3:6), c(rep(2,3), 4))

4. TOPGO for Gene Ontology Enrichment analysis

Gene ontology enrichment analysis is performed using TopGO with Fisher statistics and weight01 algorithm. The division between up-regulated and down-regulated genes is done on the mean FC between the two treatments.

4.1 Selection of modulated genes and generation of gene vectors

I generate vectors for the gene universe, all modulated genes, up-regulated genes and down-regulated genes in the format required by TopGO.

UniverseGenes <- Res$table %>% dplyr::pull(GeneName)
DEG <- Res$degs %>% dplyr::pull(GeneName)
DEGUp <- Res$degs %>% dplyr::filter(logFC > 0) %>% dplyr::pull(GeneName) 
DEGDown <- Res$degs %>% dplyr::filter(logFC < 0) %>% dplyr::pull(GeneName) 
  
# generation of named vectors in the format required by TopGO
GeneVectors <- list()
GeneVectors$DEGenes <- factor(as.integer(UniverseGenes%in%DEG))
names(GeneVectors$DEGenes) <- UniverseGenes
GeneVectors$DEGenesDown <- factor(as.integer(UniverseGenes%in%DEGDown))
names(GeneVectors$DEGenesDown) <- UniverseGenes
GeneVectors$DEGenesUp <- factor(as.integer(UniverseGenes%in%DEGUp))
names(GeneVectors$DEGenesUp) <- UniverseGenes

Therefore:

universe genes: 16314 genes
modulated genes: 599 genes
down-regulated genes: 454 genes of interest
up-regulated genes: 145 genes of interest

BpEval <- ifelse(params$TopGO=='Yes', TRUE, FALSE)
# the analysis is not done if the number of DEGs (all, down-reg or up-reg) is lower than 2.
BpEval <- ifelse(table(GeneVectors$DEGenes)['1'] < 2 | is.na(table(GeneVectors$DEGenes)['1']) | table(GeneVectors$DEGenesDown)['1'] < 2 | is.na(table(GeneVectors$DEGenesDown)['1']) | table(GeneVectors$DEGenesUp)['1'] < 2 | is.na(table(GeneVectors$DEGenesUp)['1']), FALSE, BpEval)

On the basis of the analysis settings or the number of differentially expressed genes, TopGO analysis IS performed.

4.2 Biological Process for ALL modulated: 599 genes

# I generate a list that contains the association between each gene and the GO terms that are associated to it
BPann <- topGO::annFUN.org(whichOnto="BP", feasibleGenes=names(GeneVectors$DEGenes), mapping="org.Hs.eg.db", ID="symbol") %>% inverseList()

# Wrapper function for topGO analysis (see helper file)
ResBPAll <- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=BPann, ontology='BP', description=NULL, nodeSize=10, algorithm='weight01', statistic='fisher', EnTh=2, PvalTh=0.01, minTerms=10)
## 
## Building most specific GOs .....
##  ( 11441 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 15293 GO terms and 35540 relations. )
## 
## Annotating nodes ...............
##  ( 12941 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 4249 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 19:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 18:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  5 nodes to be scored    (18 eliminated genes)
## 
##   Level 16:  10 nodes to be scored   (23 eliminated genes)
## 
##   Level 15:  24 nodes to be scored   (91 eliminated genes)
## 
##   Level 14:  52 nodes to be scored   (216 eliminated genes)
## 
##   Level 13:  100 nodes to be scored  (532 eliminated genes)
## 
##   Level 12:  168 nodes to be scored  (1388 eliminated genes)
## 
##   Level 11:  278 nodes to be scored  (3381 eliminated genes)
## 
##   Level 10:  463 nodes to be scored  (5055 eliminated genes)
## 
##   Level 9:   594 nodes to be scored  (6638 eliminated genes)
## 
##   Level 8:   636 nodes to be scored  (8807 eliminated genes)
## 
##   Level 7:   663 nodes to be scored  (10518 eliminated genes)
## 
##   Level 6:   555 nodes to be scored  (11486 eliminated genes)
## 
##   Level 5:   372 nodes to be scored  (12062 eliminated genes)
## 
##   Level 4:   208 nodes to be scored  (12502 eliminated genes)
## 
##   Level 3:   97 nodes to be scored   (12659 eliminated genes)
## 
##   Level 2:   21 nodes to be scored   (12732 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12811 eliminated genes)
write.table(ResBPAll$ResAll, file=paste0(OutputFolder, 'TopGO/BPAllResults.txt'), sep='\t', row.names=FALSE)

4.3 Biological Process Analysis for DOWN-REGULATED: 454 genes

# Wrapper function for topGO analysis (see helper file)
ResBPDown <- topGOResults(Genes=GeneVectors$DEGenesDown, gene2GO=BPann, ontology='BP', description=NULL, nodeSize=10, algorithm='weight01', statistic='fisher', EnTh=2, PvalTh=0.01, minTerms=10)
## 
## Building most specific GOs .....
##  ( 11441 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 15293 GO terms and 35540 relations. )
## 
## Annotating nodes ...............
##  ( 12941 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 3722 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 19:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 18:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  5 nodes to be scored    (18 eliminated genes)
## 
##   Level 16:  9 nodes to be scored    (23 eliminated genes)
## 
##   Level 15:  19 nodes to be scored   (91 eliminated genes)
## 
##   Level 14:  41 nodes to be scored   (205 eliminated genes)
## 
##   Level 13:  83 nodes to be scored   (466 eliminated genes)
## 
##   Level 12:  141 nodes to be scored  (1201 eliminated genes)
## 
##   Level 11:  233 nodes to be scored  (3164 eliminated genes)
## 
##   Level 10:  386 nodes to be scored  (4732 eliminated genes)
## 
##   Level 9:   494 nodes to be scored  (6393 eliminated genes)
## 
##   Level 8:   550 nodes to be scored  (8416 eliminated genes)
## 
##   Level 7:   592 nodes to be scored  (10213 eliminated genes)
## 
##   Level 6:   517 nodes to be scored  (11380 eliminated genes)
## 
##   Level 5:   344 nodes to be scored  (12019 eliminated genes)
## 
##   Level 4:   193 nodes to be scored  (12495 eliminated genes)
## 
##   Level 3:   91 nodes to be scored   (12655 eliminated genes)
## 
##   Level 2:   21 nodes to be scored   (12730 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12811 eliminated genes)
# Selection on enrichment of at least 2 is implemented (also to avoid depleted categories). Then categories are ranked by PVal and all the ones with Pval < th are selected. If the number is < minTerms, othter terms are included to reach the minimum number. 

# Results are shown in an interactive table
searchURL <- 'http://amigo.geneontology.org/amigo/term/'

ResBPDown$ResSel %>% 
  dplyr::mutate(GOLink=paste0('<a href="', searchURL, GO.ID, '">', GO.ID, '</a>')) %>% 
  dplyr::select(8, 2, 4, 5, 7, 6) %>% # selection of columns to be shown
  dplyr::filter(as.numeric(Statistics) < 0.01) %>% # only significant terms are shown in the table
  DT::datatable(class = 'hover', rownames = FALSE, caption='Down-regulated genes: Biological Process Gene Ontology Enrichment',  filter='top', options = list(pageLength = 5, autoWidth = TRUE), escape=FALSE)

4.4 Biological Process Analysis for UP-REGULATED: 145 genes

# Wrapper function for topGO analysis (see helper file)
ResBPUp <- topGOResults(Genes=GeneVectors$DEGenesUp, gene2GO=BPann, ontology='BP', description=NULL, nodeSize=10, algorithm='weight01', statistic='fisher', EnTh=2, PvalTh=0.01,  minTerms=10)
## 
## Building most specific GOs .....
##  ( 11441 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 15293 GO terms and 35540 relations. )
## 
## Annotating nodes ...............
##  ( 12941 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 2689 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 17:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 15:  12 nodes to be scored   (16 eliminated genes)
## 
##   Level 14:  18 nodes to be scored   (28 eliminated genes)
## 
##   Level 13:  46 nodes to be scored   (295 eliminated genes)
## 
##   Level 12:  86 nodes to be scored   (694 eliminated genes)
## 
##   Level 11:  149 nodes to be scored  (2807 eliminated genes)
## 
##   Level 10:  260 nodes to be scored  (4552 eliminated genes)
## 
##   Level 9:   362 nodes to be scored  (5928 eliminated genes)
## 
##   Level 8:   390 nodes to be scored  (8127 eliminated genes)
## 
##   Level 7:   421 nodes to be scored  (9761 eliminated genes)
## 
##   Level 6:   386 nodes to be scored  (10983 eliminated genes)
## 
##   Level 5:   294 nodes to be scored  (11854 eliminated genes)
## 
##   Level 4:   163 nodes to be scored  (12453 eliminated genes)
## 
##   Level 3:   79 nodes to be scored   (12647 eliminated genes)
## 
##   Level 2:   19 nodes to be scored   (12723 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12810 eliminated genes)

ResBPUp$ResSel %>% 
  dplyr::mutate(GOLink=paste0('<a href="', searchURL, GO.ID, '">', GO.ID, '</a>')) %>% 
  dplyr::select(8, 2, 4, 5, 7, 6) %>% # selection of columns to be shown
  dplyr::filter(as.numeric(Statistics) < 0.01) %>% # only significant terms are shown in the table
  DT::datatable(class = 'hover', rownames = FALSE, caption='Up-regulated genes: Biological Process Gene Ontology Enrichment',  filter='top', options = list(pageLength = 5, autoWidth = TRUE), escape=FALSE)

4.5 Result visualization: Barplot

# the if clauses avoids an error in case there is one empty category
if(dim(ResBPUp$ResSel)[1] > 0 & dim(ResBPDown$ResSel)[1] > 0 & dim(ResBPAll$ResSel)[1] > 0){

TopGOBar <- topGOBarplotAll(TopGOResAll=ResBPAll$ResSel, TopGOResDown=ResBPDown$ResSel, TopGOResUp=ResBPUp$ResSel, terms=8, pvalTh=0.01, title=NULL)

TopGOBar

ggsave(filename=paste0(OutputFolder, '/TopGO/Barplot.pdf'), TopGOBar, 
       width=13, height=8)
  }

5. GOSeq for Gene Ontology Enrichment analysis

GOSeqEval <- ifelse(params$GOSeq=='Yes', TRUE, FALSE)

Gene ontology enrichment analysis is performed using GOSeq approach. On the basis of the analysis settings, GOSeq analysis IS performed.

GOSeq <- goseq.enrichment(names(GeneVectors$DEGenes), names(GeneVectors$DEGenes)[GeneVectors$DEGenes==1], "GO:BP", cutoff=0.05)
## Loading hg19 length data...
#colnames(GOSeq) <- c("GO.ID", "Term", "Annotated", "Significant", "enrichment", "PValue", "FDR", "genes")
if(dim(GOSeq)[1] > 0) {
gotr <- goTreemap(GOSeq, removeParents=FALSE)
} # to avoid errors if no results are found
## Loading required package: treemap
## Preparing ancestors
## Warning in if (class(try(col2rgb(bg.labels), silent = TRUE)) == "try-error")
## stop("Invalid bg.labels"): the condition has length > 1 and only the first
## element will be used

6. CAMERA for Gene Set analysis

CameraEval <- ifelse(params$Camera=='Yes', TRUE, FALSE)

On the basis of the analysis settings, Camera analysis IS performed.

printTable <- function(x){
  x$genes <- NULL
  datatable(x, filter="top", extensions = 'Buttons', 
            options = list( dom = 'Bfrtip', buttons = 'csv')
            )
}

ResCamera <- Res$table %>% dplyr::mutate(StatSign=F*sign(rowMeans(Res$tableRaw[,grep('logFC\\.',colnames(Res$tableRaw))])))
row.names(ResCamera) <- ResCamera$genes

printTable(cameraWrapper(dea=ResCamera, gsets=NULL, addDEgenes=TRUE, dea.thres=0.05, minG=5, reportMax=500))
## 
## Attaching package: 'limma'
## The following object is masked from 'package:BiocGenerics':
## 
##     plotMA

7. GSEA for Gene Set Enrichment Analysis

GseaEval <- ifelse(params$Gsea=='Yes', TRUE, FALSE)

On the basis of the analysis settings, GSEA analysis IS performed.

7.1 Ranked Lists for GSEA

Rank for stat after selecting only genes with PValues < 0.05. StatSign has been calculated as for camera analysis: F value multiplied for the sign of the mean FC. N.B. fgsea sort the vector in decreasing order.

FStat <- rankGeneVector(ResCamera, Order='StatSign', PValSel=params$GseaPvalSel)
length(FStat)
## [1] 5349
head(FStat)
##     TCF3    PGBD3  ANGPTL4    MGAT1   NUCKS1   SWAP70 
## 27.12677 26.34101 22.20835 22.01903 20.87917 20.73070
tail(FStat)
##        RGS16        SCN9A        ZNF83 LOC100131564      PI4KAP1        BTAF1 
##    -28.33025    -28.67924    -29.35160    -29.55129    -31.03849    -32.85002
print(paste('Fgsea analysis is performed on', length(FStat), 'genes having PValue <', params$GseaPvalSel, 'and ranked according to signed F statistics'))
## [1] "Fgsea analysis is performed on 5349 genes having PValue < 0.05 and ranked according to signed F statistics"

7.2 GSEA analysis

fgsea analysis: H1 pathways

set.seed(42)
GseaSelH1 <- fgsea(FStat, pathways=H1, nperm=75000, maxSize=500, minSize=10, nproc=1)
## Warning in fgsea(FStat, pathways = H1, nperm = 75000, maxSize = 500,
## minSize = 10, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.07% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
data.table::fwrite(GseaSelH1, file=paste0(OutputFolder, '/Gsea/GseaSelH1.txt'), sep="\t", sep2=c("", " ", ""))

GseaResH1 <- dplyr::filter(GseaSelH1, padj < 0.1 & abs(NES) > 2) %>% 
  dplyr::mutate(LEGenes=stringr::str_count(leadingEdge, ",")+1) %>% 
  dplyr::mutate(GeneSet='H1') %>% 
  dplyr::select(1, 2, 3, 5, 9, 10)
## Warning in stri_count_regex(string, pattern, opts_regex = opts(pattern)):
## argument is not an atomic vector; coercing
GseaResH1[,1]
##                             pathway
## 1:         HALLMARK_MITOTIC_SPINDLE
## 2:          HALLMARK_G2M_CHECKPOINT
## 3:               HALLMARK_APOPTOSIS
## 4: HALLMARK_PI3K_AKT_MTOR_SIGNALING
## 5:        HALLMARK_MTORC1_SIGNALING
## 6:             HALLMARK_E2F_TARGETS
## 7:          HALLMARK_MYC_TARGETS_V1

if(dim(GseaResH1)[1] > 0){
plotGseaTable(H1[GseaResH1 %>% dplyr::arrange(NES) %>% dplyr::pull(pathway)], 
              FStat, GseaSelH1, gseaParam=0.5)
}

fgsea analysis: Kegg pathways

set.seed(42)
GseaSelKegg <- fgsea(FStat, pathways=Kegg, nperm=75000, maxSize=500, minSize=10, nproc=1)
## Warning in fgsea(FStat, pathways = Kegg, nperm = 75000, maxSize = 500,
## minSize = 10, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.07% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
data.table::fwrite(GseaSelKegg, file=paste0(OutputFolder, '/Gsea/GseaSelKegg.txt'), sep="\t", sep2=c("", " ", ""))

GseaResKegg <- dplyr::filter(GseaSelKegg, padj < 0.1 & abs(NES) > 2) %>% 
  dplyr::mutate(LEGenes=stringr::str_count(leadingEdge, ",")+1) %>% 
  dplyr::mutate(GeneSet='Kegg') %>% 
  dplyr::select(1, 2, 3, 5, 9, 10)
## Warning in stri_count_regex(string, pattern, opts_regex = opts(pattern)):
## argument is not an atomic vector; coercing
GseaResKegg[,1]
##                                  pathway
## 1:            KEGG_O_GLYCAN_BIOSYNTHESIS
## 2:          KEGG_NOTCH_SIGNALING_PATHWAY
## 3: KEGG_REGULATION_OF_ACTIN_CYTOSKELETON
## 4:                KEGG_COLORECTAL_CANCER
## 5:             KEGG_RENAL_CELL_CARCINOMA
## 6:               KEGG_ENDOMETRIAL_CANCER
## 7:     KEGG_SYSTEMIC_LUPUS_ERYTHEMATOSUS
## 8:                KEGG_VIRAL_MYOCARDITIS

if(dim(GseaResKegg)[1] > 0){
plotGseaTable(Kegg[GseaResKegg %>% dplyr::arrange(NES) %>% dplyr::pull(pathway)], 
              FStat, GseaSelKegg, gseaParam=0.5)
}

7.3 GSEA results visualization

GseaRes <- rbind(GseaResH1, GseaResKegg)
Cols <- c(H1='#e76bf3', Kegg="#39b600")

BubbleGsea <- ggplot(GseaRes, aes(x=NES, y=-log10(padj), fill=GeneSet, size=LEGenes)) +
  geom_point(alpha=0.65, shape=21, color='black') + 
  #geom_text_repel(label=GseaRes$pathway, cex=1.8, 
                  #segment.color="grey50", segment.size=0.3,
                  #force=20, nudge_y=-0.15) +
  geom_vline(xintercept = 0, col='blue') +
  xlab('NES') + ylab('-log10 Adjusted PVal') +
  scale_fill_manual(values=Cols) +
  scale_size(range=c(3,15), name='Leading Edge \n Genes') +
  theme_bw()
BubbleGsea

BubbleGsea <- ggplot(GseaRes, aes(x=NES, y=-log10(padj), fill=GeneSet, size=LEGenes)) +
  geom_point(alpha=0.65, shape=21, color='black') + 
  geom_text_repel(label=GseaRes$pathway, cex=3.5, 
                  segment.color="grey50", segment.size=0.3,
                  force=20, nudge_y=-0.2,nudge_x=-0.5) +
  geom_vline(xintercept = 0, col='blue') +
  xlab('NES') + ylab('-log10 Adjusted PVal') +
  scale_fill_manual(values=Cols) +
  scale_size(range=c(3,15), name='Leading Edge \n Genes') +
  theme_bw()
BubbleGsea

ggsave(filename=paste0(OutputFolder, '/Gsea/Bubble.pdf'), BubbleGsea, 
       width=6.5, height=5.5)

Index

MixN_Acute

MixN_ChronicFetal

FunctionalFetalChronicMixN

MixN_ChronicOrganoids

FunctionalOrganoidChronicMixN

MixNVsSingleCompounds

MasterRegulatorAnalysis

Authors

Nicolò Caporale: nicolo.caporale@ieo.it, nicolo.caporale@unimi.it

Cristina Cheroni: cristina.cheroni@ieo.it

Pierre-Luc Germain: pierre-luc.germain@ieo.it

Giuseppe Testa: giuseppe.testa@ieo.it

Lab: http://www.testalab.eu/

‘Date: December 01, 2021’

sessionInfo()

## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] msigdbr_7.2.1          limma_3.46.0           treemap_2.4-2         
##  [4] goseq_1.42.0           geneLenDataBase_1.26.0 BiasedUrn_1.07        
##  [7] dplyr_1.0.5            tidyr_1.1.3            fgsea_1.16.0          
## [10] topGO_2.42.0           SparseM_1.81           GO.db_3.12.1          
## [13] graph_1.68.0           RColorBrewer_1.1-2     gridExtra_2.3         
## [16] org.Hs.eg.db_3.12.0    AnnotationDbi_1.52.0   IRanges_2.24.1        
## [19] S4Vectors_0.28.1       Biobase_2.50.0         BiocGenerics_0.36.0   
## [22] ggrepel_0.9.1          ggplot2_3.3.3          DT_0.17               
## [25] stringr_1.4.0         
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_2.0-0            ellipsis_0.3.1             
##  [3] XVector_0.30.0              GenomicRanges_1.42.0       
##  [5] farver_2.1.0                bit64_4.0.5                
##  [7] fansi_0.4.2                 xml2_1.3.2                 
##  [9] splines_4.0.3               cachem_1.0.4               
## [11] knitr_1.31                  jsonlite_1.7.2             
## [13] Rsamtools_2.6.0             gridBase_0.4-7             
## [15] dbplyr_2.1.0                shiny_1.6.0                
## [17] compiler_4.0.3              httr_1.4.2                 
## [19] assertthat_0.2.1            Matrix_1.3-2               
## [21] fastmap_1.1.0               later_1.1.0.1              
## [23] htmltools_0.5.1.1           prettyunits_1.1.1          
## [25] tools_4.0.3                 igraph_1.2.6               
## [27] gtable_0.3.0                glue_1.4.2                 
## [29] GenomeInfoDbData_1.2.4      rappdirs_0.3.3             
## [31] fastmatch_1.1-0             Rcpp_1.0.6                 
## [33] jquerylib_0.1.3             vctrs_0.3.7                
## [35] Biostrings_2.58.0           nlme_3.1-152               
## [37] rtracklayer_1.50.0          crosstalk_1.1.1            
## [39] xfun_0.21                   mime_0.10                  
## [41] lifecycle_1.0.0             XML_3.99-0.5               
## [43] zlibbioc_1.36.0             scales_1.1.1               
## [45] hms_1.0.0                   promises_1.2.0.1           
## [47] MatrixGenerics_1.2.1        SummarizedExperiment_1.20.0
## [49] yaml_2.2.1                  curl_4.3                   
## [51] memoise_2.0.0               sass_0.3.1                 
## [53] biomaRt_2.46.3              stringi_1.5.3              
## [55] RSQLite_2.2.3               highr_0.8                  
## [57] GenomicFeatures_1.42.1      BiocParallel_1.24.1        
## [59] GenomeInfoDb_1.26.2         rlang_0.4.10               
## [61] pkgconfig_2.0.3             matrixStats_0.58.0         
## [63] bitops_1.0-6                evaluate_0.14              
## [65] lattice_0.20-41             purrr_0.3.4                
## [67] labeling_0.4.2              GenomicAlignments_1.26.0   
## [69] htmlwidgets_1.5.3           bit_4.0.4                  
## [71] tidyselect_1.1.0            magrittr_2.0.1             
## [73] R6_2.5.0                    generics_0.1.0             
## [75] DelayedArray_0.16.1         DBI_1.1.1                  
## [77] pillar_1.5.1                withr_2.4.1                
## [79] mgcv_1.8-34                 RCurl_1.98-1.2             
## [81] tibble_3.1.0                crayon_1.4.1               
## [83] utf8_1.2.1                  BiocFileCache_1.14.0       
## [85] rmarkdown_2.7               progress_1.2.2             
## [87] grid_4.0.3                  data.table_1.14.0          
## [89] blob_1.2.1                  digest_0.6.27              
## [91] xtable_1.8-4                httpuv_1.5.5               
## [93] openssl_1.4.3               munsell_0.5.0              
## [95] bslib_0.2.4                 askpass_1.1