Values of RMarkdown parameters
for (i in 1:length(params))
print(paste('Parameter:', names(params)[i], ' - Value:', params[[i]], '- Class:', class(params[[i]])))
## [1] "Parameter: Dataset - Value: CTL08 - Class: character"
## [1] "Parameter: SE - Value: ~/DataDir/bulkRNASeq/3.DataExploration/CTL08/Output/AfterSamplesSelection/MaleLineSE_Bio.rds - Class: character"
## [1] "Parameter: OutputFolder - Value: ~/DataDir/bulkRNASeq/4.GeneSignatures/CTL08/Output/AfterSamplesSelection/MetabolismSig/ - Class: character"
## [1] "Parameter: CpmFilt - Value: 2 - Class: integer"
## [1] "Parameter: SampleFilt - Value: 4 - Class: integer"
library(RNASeqBulkExploratory) #Our Package
library(DT)
library(gridExtra)
library(SummarizedExperiment)
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(SEtools)
## Loading required package: sechm
library(sechm)
library(randomcoloR)
library(ggplot2)
library(tidyHeatmap)
## ========================================
## tidyHeatmap version 1.8.1
## If you use tidyHeatmap in published research, please cite:
## 1) Mangiola et al. tidyHeatmap: an R package for modular heatmap production
## based on tidy principles. JOSS 2020.
## 2) Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
## This message can be suppressed by:
## suppressPackageStartupMessages(library(tidyHeatmap))
## ========================================
##
## Attaching package: 'tidyHeatmap'
## The following object is masked from 'package:stats':
##
## heatmap
<- params$Dataset
Dataset <- params$OutputFolder
OutputFolder
if (dir.exists(OutputFolder) == FALSE) {
dir.create(OutputFolder, recursive=FALSE)
}
SE object created from exploratory analysis. Steps already performed: gene selection according to biotype.
<- readRDS(params$SE) SE_Bio
SE object containing information about 19900 genes in 51 samples.
if(! identical(names(assays(SE_Bio)$counts), rownames(colData(SE_Bio)))){
stop('Inconsistencies in sample metadata for SE object')
}
if(! identical(names(assays(SE_Bio)$counts), colData(SE_Bio)$HRID)){
stop('Inconsistencies in sample metadata for SE object')
}
<- read.table(file = '~/DataDir/bulkRNASeq/1.GeneLength/Output/GenecodeV35GeneLength.txt',
GeneLength sep='\t', header=TRUE)
<- dplyr::left_join(as.data.frame(rowData(SE_Bio))[,1:3], GeneLength, by=c('Gene'='Gene')) SelLenght
if(identical(SelLenght$Gene, rowData(SE_Bio)$Gene)){
rowData(SE_Bio)$GeneLength <- SelLenght$Length
print('Gene Lengh information added to SE')
else {
} print('Information on gene lenght is not compatible with SE object. Gene Lenght information not added to SE')
}## [1] "Gene Lengh information added to SE"
<- read.table(file = '~/DataDir/bulkRNASeq/4.GeneSignatures/GeneSets/MetabolismGenes.txt',
GS sep='\t', header=TRUE, row.names = NULL)
names(GS)[3] <- 'Population'
%>%
GS data.frame() %>%
::datatable() DT
Already performed.
<- params$CpmFilt
CpmFilt if(is.numeric(CpmFilt)==FALSE){
stop('ERROR! Cpm threshold for gene filtering has a non-numeric value')
}
<- ifelse(params$SampleFilt=='Default', dim(SE_Bio)[2]/2, params$SampleFilt)
SampleFilt if(is.numeric(SampleFilt)==FALSE){
stop('ERROR! Sample threshold for gene filtering has a non-numeric value')
}
Genes having an expression lower than 2 in at least 4 samples are filtered out.
$lib.size
SE_Bio## S32829_H12_DMSO S32830_H14_Ret_Ag S32831_H15_Ret_Ag
## 35613673 59279739 70826142
## S32833_H27_Thyr_Inh S32837_H35_LivX_Inh S32838_H36_LivX_Inh
## 67049867 56634424 68940854
## S32839_H38_LivX_Ag S32840_H39_LivX_Ag S32841_H41_Estr_Ag
## 53964028 36461411 63518569
## S32842_H42_Estr_Ag S32845_H51_Andr_Ag S32846_H52_Andr_Inh
## 42604865 45209919 85402393
## S32847_H53_Andr_Inh S32848_H54_Andr_Inh S32849_H60_GC_Inh
## 64432453 30728121 76458966
## S32853_H68_AhHyd_Inh S32854_H69_AhHyd_Inh S32857_H22_Thyr_Ag
## 79053007 28308197 51578374
## S32858_H25_Thyr_Inh S32860_H9_DMSO S32861_H17_Ret_Inh
## 69682372 54069427 68212493
## S32862_H18_Ret_Inh S32864_H13_Ret_Ag S32865_H23_Thyr_Ag
## 35022905 27114632 36213800
## S35870_MTR3_DMSO S35872_MTR5_Thyr_Inh S35875_MTR8_LivX_Inh
## 33548410 48558405 56970827
## S35876_MTR9_LivX_Ag S35879_MTR12_Andr_Ag S35881_MTR14_GC_Inh
## 52549588 40765101 55221358
## S35882_MTR15_AhHyd_Inh MTR16_S34_CTL MTR17_S38_CTL
## 52843891 29163508 35691834
## MTR19_S41_CTL MTR20_S48_CTL MTR21_S29_CTL
## 38249382 23658993 38700856
## MTR22_S46_DMSO MTR23_S28_DMSO MTR24_S51_DMSO
## 36339476 31756335 28625488
## MTR25_S43_DMSO MTR26_S33_DMSO MTR27_S53_Thyr_Ag
## 32823794 29031507 36025706
## MTR28_S50_Thyr_Ag MTR29_S30_Andr_Ag MTR30_S42_Andr_Ag
## 36156859 38661303 37547288
## MTR31_S47_GC_Ag MTR32_S35_GC_Ag MTR33_S37_GC_Ag
## 39385341 31560063 23287199
## MTR37_S52_AhHyd_Ag MTR38_S44_AhHyd_Ag MTR39_S49_AhHyd_Ag
## 33521688 34704497 32385629
<- filterSE(SE_Bio, cpmTh=CpmFilt, sampleTh=SampleFilt)
SE_Filt ## UPDATED library.size slot
<- normTmmSE(SE_Filt, useNormFactors=TRUE, priorCount=0.25)
SE_Filt
assays(SE_Filt)$fpkm <- edgeR::rpkm(SE_Filt, normalized.lib.sizes = TRUE, gene.length=rowData(SE_Filt)$GeneLength)
assays(SE_Filt)$logfpkm <- edgeR::rpkm(SE_Filt, normalized.lib.sizes = TRUE, gene.length=rowData(SE_Filt)$GeneLength,
log=TRUE, prior.count=0.25)
After filtering out lowly-expressed genes, the dataset is structured in 14213 genes measured in 51 samples.
rownames(SE_Filt) <- rowData(SE_Filt)$GeneName
<- SE_Filt[!duplicated(rownames(SE_Filt)), ] SE_Filt
Calculate the fold-change compared to the controls (DMSO). This is done on logcpm.
<- as.vector(which(SE_Filt$Condition=='DMSO'))
DMSO <- SEtools::log2FC(SE_Filt, fromAssay='logcpm', controls=DMSO, isLog=TRUE, toAssay='log2FC') SE_Filt
colData(SE_Filt)$Condition <- factor(colData(SE_Filt)$Condition, levels=c("CTL", "DMSO", "AhHyd_Ag", "AhHyd_Inh", "Andr_Ag", "Andr_Inh", "Estr_Ag", "Estr_Inh", "GC_Ag", "GC_Inh", "LivX_Ag", "LivX_Inh", "Ret_Ag", "Ret_Inh", "Thyr_Ag", "Thyr_Inh" ))
metadata(SE_Filt)$anno_colors <- list(Condition = c('DMSO' = 'grey30', 'CTL' = 'azure3',
'AhHyd_Ag'='#F8766D', 'AhHyd_Inh'='#F8766D50',
'Andr_Ag'='#fccb17', 'Andr_Inh'='#C49A0050',
"Estr_Ag"= '#53B400', "Estr_Inh"= '#53B40050',
'GC_Ag' = '#00C094', 'GC_Inh' = '#00C09450',
'LivX_Ag' = '#00B6EB', 'LivX_Inh' = '#00B6EB50',
'Ret_Ag' = '#A58AFF', 'Ret_Inh' = '#A58AFF50',
'Thyr_Ag' = '#FB61D7', 'Thyr_Inh' = '#FB61D750'
)SeqRun = c("20210310" = "orange", "20210724" = "green4", "20220422" =
, "#66ACB7"))
<- GS %>% dplyr::pull(GeneName)
Genes
! Genes %in% row.names(SE_Filt)]
Genes[## [1] "CYP11A1" "CYP17A1" "CYP11B1" "CYP11B2" "CYP21A1" "HSD3B2" "AKR1C4"
## [8] "HSD11B1" "HSD17B3" "HSD3B1" "AKR1C3" "AKR1C4" "SRD5A2" "STAR"
## [15] "CYP19A1" "IDI2"
rowData(SE_Filt)$Signatures <- NA
for (i in which(rowData(SE_Filt)$GeneName %in% Genes)){
<- row.names(SE_Filt)[i]
gene rowData(SE_Filt)[i, 'Signatures'] <-GS[GS$GeneName==gene, "Population"]
}
#color for signatures
metadata(SE_Filt)$anno_colors$Signatures <- c("Steroid biogenesis" = "#e6194b", "Cholesterol synthesis" = "#4363d8")
#color for heatmaps
<- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1') ScaledCols
The following genes are not selected as expressed: CYP11A1, CYP17A1, CYP11B1, CYP11B2, CYP21A1, HSD3B2, AKR1C4, HSD11B1, HSD17B3, HSD3B1, AKR1C3, AKR1C4, SRD5A2, STAR, CYP19A1, IDI2.
<- assays(SE_Filt[row.names(SE_Filt) %in% Genes, ])$logfpkm %>% quantile(seq(0,1, by=0.1))
break_coord <- viridisLite::cividis(length(seq(head(break_coord,1), tail(break_coord,1), by=0.2)))
ExpCols
<- sechm::sechm(SE_Filt, features=Genes, assayName="logfpkm", gaps_at = "Condition", gaps_row="Signatures", top_annotation = c("Condition", "SeqRun"), show_rownames = TRUE, left_annotation = c("Signatures"), sortRowsOn=NULL, cluster_rows=FALSE, cluster_cols = FALSE, show_colnames=TRUE, hmcols=ExpCols, breaks=seq(head(break_coord,1), tail(break_coord,1), by=0.2), column_title = "Logfpkm")
p1 # breaks FALSE to avoid a symmetrical scale
p1
Saving to PDF file
<- sechm::sechm(SE_Filt, features=Genes, assayName="logfpkm", gaps_at = "Condition", gaps_row="Signatures", top_annotation = c("Condition", "SeqRun"), show_rownames = TRUE, left_annotation = c("Signatures", "UpregulatedBy", "DownregulatedBy"), sortRowsOn=NULL, cluster_rows=FALSE, cluster_cols = FALSE, show_colnames=TRUE, hmcols=ScaledCols, do.scale=TRUE, breaks=0.8, column_title = "Scaled Logfpkm", )
p2 p2
Saving to PDF file
<- sechm::sechm(SE_Filt, features=Genes, assayName="log2FC", gaps_at = "Condition", gaps_row="Signatures", top_annotation = c("Condition", "SeqRun"), show_rownames = TRUE, left_annotation = c("Signatures", "UpregulatedBy", "DownregulatedBy"), sortRowsOn=NULL, cluster_rows=FALSE, cluster_cols = FALSE, show_colnames=TRUE, hmcols=ScaledCols,
p3 do.scale=FALSE, breaks = 0.85, column_title = "LogFC versus DMSO")
p3
Saving to PDF file
<- sessionInfo()
SessionInfo <- date()
Date save.image(paste0(OutputFolder, '/', Dataset, 'SigMetab_Workspace.RData'))
Date
## [1] "Fri Jul 18 13:27:31 2025"
SessionInfo
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] tidyHeatmap_1.8.1 ggplot2_3.4.1
## [3] randomcoloR_1.1.0.1 SEtools_1.12.0
## [5] sechm_1.6.0 SummarizedExperiment_1.28.0
## [7] Biobase_2.58.0 GenomicRanges_1.50.2
## [9] GenomeInfoDb_1.34.9 IRanges_2.32.0
## [11] S4Vectors_0.36.1 BiocGenerics_0.44.0
## [13] MatrixGenerics_1.10.0 matrixStats_0.63.0
## [15] gridExtra_2.3 DT_0.27
## [17] RNASeqBulkExploratory_0.2.1
##
## loaded via a namespace (and not attached):
## [1] Rtsne_0.16 colorspace_2.1-0 rjson_0.2.21
## [4] ellipsis_0.3.2 circlize_0.4.15 XVector_0.38.0
## [7] GlobalOptions_0.1.2 clue_0.3-64 rstudioapi_0.14
## [10] bit64_4.0.5 AnnotationDbi_1.60.0 fansi_1.0.4
## [13] splines_4.2.1 codetools_0.2-19 doParallel_1.0.17
## [16] cachem_1.0.7 geneplotter_1.76.0 knitr_1.42
## [19] jsonlite_1.8.4 annotate_1.76.0 cluster_2.1.4
## [22] png_0.1-8 pheatmap_1.0.12 compiler_4.2.1
## [25] httr_1.4.5 Matrix_1.5-3 fastmap_1.1.1
## [28] limma_3.54.1 cli_3.6.1 htmltools_0.5.4
## [31] tools_4.2.1 gtable_0.3.1 glue_1.6.2
## [34] GenomeInfoDbData_1.2.9 dplyr_1.1.0 V8_4.2.2
## [37] Rcpp_1.0.10 jquerylib_0.1.4 vctrs_0.6.2
## [40] Biostrings_2.66.0 nlme_3.1-162 crosstalk_1.2.0
## [43] iterators_1.0.14 xfun_0.37 stringr_1.5.0
## [46] openxlsx_4.2.5.2 lifecycle_1.0.3 dendextend_1.16.0
## [49] XML_3.99-0.13 ca_0.71.1 edgeR_3.40.2
## [52] zlibbioc_1.44.0 scales_1.2.1 TSP_1.2-2
## [55] parallel_4.2.1 RColorBrewer_1.1-3 ComplexHeatmap_2.14.0
## [58] yaml_2.3.7 curl_5.0.0 memoise_2.0.1
## [61] sass_0.4.5 stringi_1.7.12 RSQLite_2.3.0
## [64] highr_0.10 genefilter_1.80.3 foreach_1.5.2
## [67] seriation_1.4.1 zip_2.2.2 BiocParallel_1.32.5
## [70] shape_1.4.6 rlang_1.1.1 pkgconfig_2.0.3
## [73] bitops_1.0-7 evaluate_0.20 lattice_0.20-45
## [76] purrr_1.0.1 patchwork_1.1.2 htmlwidgets_1.6.1
## [79] bit_4.0.5 tidyselect_1.2.0 magrittr_2.0.3
## [82] DESeq2_1.38.3 R6_2.5.1 generics_0.1.3
## [85] DelayedArray_0.24.0 DBI_1.1.3 withr_2.5.0
## [88] mgcv_1.8-41 pillar_1.8.1 survival_3.5-3
## [91] KEGGREST_1.38.0 RCurl_1.98-1.10 tibble_3.2.1
## [94] crayon_1.5.2 utf8_1.2.3 rmarkdown_2.20
## [97] viridis_0.6.2 GetoptLong_1.0.5 locfit_1.5-9.7
## [100] grid_4.2.1 sva_3.46.0 data.table_1.14.8
## [103] blob_1.2.3 digest_0.6.31 xtable_1.8-4
## [106] tidyr_1.3.0 munsell_0.5.0 viridisLite_0.4.1
## [109] registry_0.5-1 bslib_0.4.2