Analysis purpose

Check the expression levels and behavior through post-conceptional weeks from Brainspan (pre-natal cortex samples).

1. SetUp

Library and source upload

library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

Values of RMarkdown parameters

for (i in 1:length(params))
  print(paste('Parameter:', names(params)[i], ' - Value:', params[[i]], '- Class:', class(params[[i]])))
## [1] "Parameter: Dataset  - Value: BrainSpan - Class: character"
## [1] "Parameter: RpkmListPrel  - Value: ~/DataDir/ExternalData/BulkData/BrainSpan/OutputPreparation/PreCortexRpkmList.rds - Class: character"
## [1] "Parameter: OutputFolder  - Value: ~/DataDir/ExternalData/BulkData/BrainSpan/Output/ - Class: character"

Source helper file

source("../SignatureHelper.R") 

Folder organization

OutputFolder <- params$OutputFolder

if (dir.exists(OutputFolder) == FALSE) {
  dir.create(OutputFolder, showWarnings=TRUE, recursive=TRUE)
}

2. Data Upload

I upload:

  • RpkmList: contains gene expression levels for Brainspan in LogRpkm
  • Signatures: list of genes retrieved from different literature sources

2.1 LogRpkm expression data

Load data
RpkmListPrel <- readRDS(params$RpkmListPrel)
summary(row.names(RpkmListPrel$Log2Rpkm))
##    Length     Class      Mode 
##     44507 character character
Filter out outliers PCW
Sel <- (!(RpkmListPrel$Meta$Stage %in% c(25, 26, 35))) #I eliminate the PCW that were found as outliers

RpkmList <- list()
RpkmList$Rpkm <- RpkmListPrel$Rpkm[, Sel]
RpkmList$Log2Rpkm <- RpkmListPrel$Log2Rpkm[, Sel]
RpkmList$Meta <- RpkmListPrel$Meta[Sel, ]

if(!identical(colnames(RpkmList$Log2Rpkm)[1:157], RpkmList$Meta$ID)){
  stop('Inconsistency in Expression and Metadata matrices!')
  }

### Check for duplicated gene names
table(duplicated(RpkmList$Log2Rpkm$GeneName))
## 
## FALSE 
## 44507

After removal of week 25, 26 and 35:

  • The dataset is structured in 44507 genes measured in 157 samples.
  • The following column (sample) metadata are available: ID, names, column_num, donor_name, age, gender, structure_acronym, Birth, Stage

2.2 Gene signatures

GS <- read.table(file = '~/DataDir/ExternalData/Receptors/ReceptorsComplete.txt', 
                         sep='\t', header=TRUE)
colnames(GS)[2] <- "Population"
as.data.frame(GS) %>% 
  DT::datatable(class='hover', rownames=FALSE, caption='Hormonal receptors', 
                filter='top', escape=TRUE, extension='Buttons', 
                options=list(pageLength=10, dom='Bfrtip', 
                             buttons=list(I('colvis'), c('csv', 'excel')))
                )
GSUnique <- GS[!duplicated(GS$GeneName),]
GSUnique$GeneName[!(GSUnique$GeneName %in% RpkmList$Log2Rpkm$GeneName)]
## character(0)
GSUnique <- GSUnique[(GSUnique$GeneName %in% RpkmList$Log2Rpkm$GeneName), ]

summary(GS)
##    GeneName          Population       
##  Length:39          Length:39         
##  Class :character   Class :character  
##  Mode  :character   Mode  :character

The signature is composed by 39genes classified in 11 categories.


3. Retrieve fetal signature gene expression in LogRpkm

CuratedSig lists is created:

  • LogRpkm by LogFpkmSignature function: stores expression values in logRpkm for signature genes
  • LogRpkmMean by SigLogFpkmMean function: calculate mean expression for each gene in each stage
CuratedSig <- list()
# Retrieval of expression values for gene signatures
CuratedSig$LogRpkm <- LogFpkmSignature(MetaData=RpkmList$Meta, LogFpkmData=RpkmList$Log2Rpkm[,1:157],
                                        GeneSig=unique(GSUnique$GeneName))

print(paste('Genes: ', dim(CuratedSig$LogRpkm)[2]-4))
## [1] "Genes:  44"
print(paste('Samples: ', dim(CuratedSig$LogRpkm)[1]))
## [1] "Samples:  157"
  
# Calculation of mean expression across stages
CuratedSig$LogRpkmMean <- SigLogFpkmMean(CuratedSig$LogRpkm, StartCol=10,
                                                             GroupCol='Stage', MetaInfo=GSUnique,
                                                             Arrange='Population')
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(StartCol)
## 
##   # Now:
##   data %>% select(all_of(StartCol))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'GeneName'. You can override using the
## `.groups` argument.

print(paste('MeanLog2Rpkm Genes', dim(CuratedSig$LogRpkmMean)[1]))
## [1] "MeanLog2Rpkm Genes 390"
print(paste('Min Value', round(min(CuratedSig$LogRpkmMean$ExpMean), 2)))
## [1] "Min Value 0"
print(paste('Max Value', round(max(CuratedSig$LogRpkmMean$ExpMean), 2)))
## [1] "Max Value 6.57"

4. Visualization by lollipop

ColVec <- scales::hue_pal()(12)

LogRpkmMeanRel <- CuratedSig$LogRpkmMean %>% dplyr::mutate(Stage=factor(Stage, levels=c('8', '9', '12', '13', '16', 
                                                                                            '17', '19', '21', '24', '37'))) 

CuratedSig$Lolli <- LolliPopCols(Data=LogRpkmMeanRel, PointSize=2,
                                                   SegSize=1, yLow=0, yHigh=7, CeilDown=0, 
                                                   CeilUp=7, unit='LogRpkm', cols=ColVec,
                                                   Shape=15, GeneSize=4.2, filterZero=FALSE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.


CuratedSig$Lolli

ggsave(plot=CuratedSig$Lolli, filename=paste0(OutputFolder, 'BSpan_LogRpkm.pdf'), width=10, height=10)
ggsave(plot=CuratedSig$Lolli, filename=paste0(OutputFolder, 'BSpan_LogRpkm.png'), width=10, height=10)

5. Calculate Delta Log2Fpkm compared to a reference stage

CuratedSig$LogRpkmDelta <- DeltaLogFpkmMean(CuratedSig$LogRpkmMean, 
                                                             GroupCol='Stage', DiscardStage=NULL, 
                                                             RefStage='S8', Arrange='Population', 
                                                             MetaCols=2, Th=0, Substitute=NA)

print(paste('DeltaLogFpkm Genes', dim(CuratedSig$LogRpkmDelta)[1]))
## [1] "DeltaLogFpkm Genes 351"
print(paste('Min Value', round(min(CuratedSig$LogRpkmDelta$ExpMean, na.rm=T),2)))
## [1] "Min Value -1.37"
print(paste('Max Value', round(max(CuratedSig$LogRpkmDelta$ExpMean, na.rm=T),2)))
## [1] "Max Value 3.78"

6. Visualization by lollipop of delta expression levels compared to the earliest stage

LogRpkmDeltaRel <- CuratedSig$LogRpkmDelta %>% dplyr::mutate(Stage=factor(Stage, levels=c('D2_S9_S8', 'D3_S12_S8', 'D4_S13_S8', 'D5_S16_S8', 'D6_S17_S8', 
                                                                                            'D7_S19_S8', 'D8_S21_S8', 'D9_S24_S8',  'D10_S37_S8'))) 

CuratedSig$LolliDelta <- LolliPopCols(LogRpkmDeltaRel, PointSize=4, SegSize=1.5,
                                            cols=ColVec, yLow=-2, yHigh=4,  CeilUp=4, CeilDown=-2, 
                                            unit='DeltaLogRpkm', Shape=18, GeneSize=5, filterZero=FALSE)
CuratedSig$LolliDelta

ggsave(plot=CuratedSig$LolliDelta, filename=paste0(OutputFolder,  'BSpan_DeltaLogRpkm.pdf'), width=10, height=10)
ggsave(plot=CuratedSig$LolliDelta, filename=paste0(OutputFolder, 'BSpan_DeltaLogRpkm.png'), width=10, height=10)

7. Save information

save.image(file=paste0(OutputFolder, 'AnalysisWorkspace.RData'))
SessionInfo <- sessionInfo()
Date <- date()
Date
## [1] "Tue Jul 22 19:02:44 2025"
SessionInfo
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggplot2_3.4.1 dplyr_1.1.0   tidyr_1.3.0  
## 
## loaded via a namespace (and not attached):
##  [1] highr_0.10        pillar_1.8.1      bslib_0.4.2       compiler_4.2.1   
##  [5] jquerylib_0.1.4   tools_4.2.1       digest_0.6.31     jsonlite_1.8.4   
##  [9] evaluate_0.20     lifecycle_1.0.3   tibble_3.2.1      gtable_0.3.1     
## [13] pkgconfig_2.0.3   rlang_1.1.1       cli_3.6.1         rstudioapi_0.14  
## [17] crosstalk_1.2.0   yaml_2.3.7        xfun_0.37         fastmap_1.1.1    
## [21] withr_2.5.0       knitr_1.42        systemfonts_1.0.4 htmlwidgets_1.6.1
## [25] generics_0.1.3    vctrs_0.6.2       sass_0.4.5        grid_4.2.1       
## [29] DT_0.27           tidyselect_1.2.0  glue_1.6.2        R6_2.5.1         
## [33] textshaping_0.3.6 fansi_1.0.4       rmarkdown_2.20    farver_2.1.1     
## [37] purrr_1.0.1       magrittr_2.0.3    ellipsis_0.3.2    scales_1.2.1     
## [41] htmltools_0.5.4   colorspace_2.1-0  ragg_1.2.3        labeling_0.4.2   
## [45] utf8_1.2.3        munsell_0.5.0     cachem_1.0.7      crayon_1.5.2