suppressPackageStartupMessages({
library(ComplexHeatmap)
library(DT)
library(ggplot2)
library(cowplot)
library(ggrepel)
library(viper)
library(limma)
library(SummarizedExperiment)
library(SEtools)
library(igraph)
library(overlapper)
})theme_set(theme_cowplot())
source("Functions/EDC_Functions.R")
source("Functions/CriFormatted.R")
source("Functions/clustering.R")
source("Functions/clusterEnrichment.R")
load("Data/AllSEcorrected.RData")
<- lapply(SEs[1:3], FUN=function(x){
SEs <- x[,x$EXPO!="VITC"]
x assays(x)$logFC <- assays(x)$corrected-rowMeans(assays(x)$corrected[,x$EXPO=="CNT"])
x
})<- DEAs[1:3]
DEAs # we get DEGs significant in at least one system
<- getDEGsFromRes(DEAs)
degs # we get 'cross-system degs'
<- sapply(DEAs, FUN=function(x) x[unique(unlist(degs)),"FDR"])
tmp <- apply(tmp,1,weights=c(1,1,1),FUN=aggregation::lancaster)
q names(q) <- unique(unlist(degs))
<- names(q)[q<0.05] degs2
<- mergeSEs(SEs[1:2], use.assays="logFC", do.scale = FALSE)
se <- intersect(degs2,row.names(se))
degs2 <- se[,se$EXPO %in% c("CNT","1X","1000X")]
se
# binarize
<- sapply(split(seq_len(ncol(se)), paste(se$Dataset, se$EXPO)), FUN=function(i, lfct=0.1){
b <- assays(se)$logFC[degs2,i,drop=FALSE]
x <- sign(rowMedians(x))*(abs(rowMedians(x))>lfct)
y <- apply(sign(x),1,FUN=function(x){
w max(as.numeric(table(x)))<(0.75*length(x))
})<- y[w]/2
y[w]
y
})row.names(b) <- degs2
set.seed(1234)
<- clusterWrapper(b[,grep("CNT",colnames(b),invert=TRUE)], 4:10, nstart=3) cl
rowData(se)$cluster <- factor(cl[["8"]]$cluster[row.names(se)])
<- split(names(cl[["8"]]$cluster), cl[["8"]]$cluster)
cg
#cg2<-list()
#cg2$concordant <- cg[c(2,3,7,8)]
#cg2$discordant <- cg[c(1,4,6,5)]
#rowData(se)$cluster2 <-rowData(se)$cluster
#rowData(se)$cluster2[which((rowData(se)$cluster==2)|(rowData(se)$cluster==3)|(rowData(se)$cluster==7)|(rowData(se)$cluster==8))] <- 1
#rowData(se)$cluster2[which((rowData(se)$cluster==1)|(rowData(se)$cluster==4)|(rowData(se)$cluster==6)|(rowData(se)$cluster==5))] <- 2
sehm(se, degs2, do.scale = FALSE, anno_columns = c("Dataset","EXPO2"), toporder = "cluster", anno_rows = "cluster", breaks=0.99,show_rownames = F,sortRowsOn = F,gaps_at = "Dataset")
## Using assay logFC
#sehm(se, degs2, do.scale = FALSE, anno_columns = c("Dataset","EXPO2"), toporder = "cluster2", anno_rows = "cluster2", breaks=0.99,show_rownames = F,sortRowsOn = F)
# we isolate clusters that are coherent between the two systems
<- list(concordant=unlist(cg[c(2,3,7,8)]),
dc up.both=unlist(cg[c(3,8)]),
down.both=unlist(cg[c(2,7)]),
discordant=unlist(cg[c(1)]),
only_org=unlist(cg[c(4,6)]),
only_fetal=unlist(cg[c(5)]))
save(dc, file = "Data/DEGsCluster.RData")
<- getMsigSets("C5")
sets <- sets[grep("GO:BP",names(sets),fixed=TRUE)]
sets names(sets) <- gsub("C5:GO:BP:GO_","",names(sets),fixed=TRUE)
<- clusterEnrichment(cg, sets=sets, universe=row.names(se))
go #plotClusterEnrichment(go)
<- aggregate(assays(se)$logFC, by=list(cluster=rowData(se)$cluster), FUN=median)
ag row.names(ag) <- ag[,1]
<- SummarizedExperiment(list(logFC=ag[,-1]), colData=colData(se))
se.ag <- sechm(se.ag, row.names(se.ag), do.scale = FALSE, anno_columns = c("Dataset","EXPO2"), breaks=1, show_rownames = F, width=unit(5,"cm"), sortRowsOn=NULL, cluster_rows=FALSE, row_title="Clusters", gaps_at="Dataset") h1
## Using assay logFC
#h1
#m <- t(plotClusterEnrichment(go, returnMatrix=TRUE))
#colnames(m) <- breakStrings(tolower(gsub("_"," ",colnames(m))))
#h2 <- Heatmap(m, cluster_rows=FALSE, show_column_dend=FALSE, name="Enrichment")
#h1 + h2
<- goclassify(cg, sets) go2
##
## Attaching package: 'xgboost'
## The following object is masked from 'package:IRanges':
##
## slice
#plot.goclassify(go2, n=16)
+ plot.goclassify(go2, n=16, transpose=TRUE, cluster_rows=FALSE, show_column_dend=FALSE) h1
load("Data/ASD.RData")
<- readRDS("Data/psychencode.DEGs.rds")
psychencode
<- lapply(ASD,FUN=function(x) gsub(" ","",x))
ASD <- intersect(unlist(dc[1]), unlist(ASD))
asd.degs sehm(se, asd.degs, do.scale = FALSE, anno_columns = c("EXPO2","Dataset"), toporder = "cluster", show_rownames = TRUE, breaks=0.995, main="ASD-associated consistent DEGs")
## Using assay logFC
<-unlist(dc[1])
consistentDEGs <- unlist(dc[4])
oppositeDEGs <- overlapper::multintersect(ll = list(consistentDEGs=consistentDEGs,oppositeDEGs=oppositeDEGs), ll2 = psychencode, universe = row.names(DEAs$chronic.fetal),two.tailed = F)
m
dotplot.multintersect(m, sizeRange = c(0,15), th=0.05)
<- goseq.enrichment(row.names(se), unlist(dc[1])) go
## Loading hg19 length data...
1:10,c(2,4,5,7)] go[
## Term
## 6702 extracellular matrix organization
## 10203 extracellular structure organization
## 2349 collagen trimer
## 7064 extracellular matrix
## 3301 cell adhesion
## 6531 biological adhesion
## 2210 extracellular matrix structural constituent
## 2526 integral component of plasma membrane
## 16742 GABA-ergic synapse
## 6599 extracellular matrix structural constituent conferring tensile strength
## Significant Enrichment FDR
## 6702 31 2.758347 4.527524e-05
## 10203 31 2.747357 4.527524e-05
## 2349 12 6.355638 6.494698e-05
## 7064 34 2.431900 2.317490e-04
## 3301 68 1.649555 3.594709e-04
## 6531 68 1.644176 3.594709e-04
## 2210 16 3.669234 9.156583e-04
## 2526 59 1.665532 1.019975e-03
## 16742 11 4.993715 1.611946e-03
## 6599 8 6.591032 2.670818e-03
<- mergeSEs(SEs, use.assays="logFC", do.scale = FALSE)
se2 sehm(se2[,grep("T3|BPA",colnames(se2),invert=TRUE)], intersect(degs$acute.fetal,degs2), do.scale = FALSE, anno_columns = c("Dataset","EXPO2"), breaks = 0.995, main="DEGs in acute exposure and at least one chronic system")
## Using assay logFC
sehm(se2[,grep("T3|BPA",colnames(se2),invert=TRUE)], intersect(degs$acute.fetal,unlist(dc[1])), do.scale = FALSE, anno_columns = c("Dataset","EXPO2"), breaks = 0.99, main=c("Consistent in chronic systems and\naltered upon acute exposure"))
## Using assay logFC
load("Data/regulon_weighted.RData")
<- lapply(regulon, FUN=function(x) intersect(row.names(se),names(x$tfmode)))
regulon <- t(sapply(regulon, FUN=function(x){
tf.en c( overlap.up=length(intersect(x, dc$up.both)),
enrichment.up=log2(length(intersect(x, dc$up.both))/(length(dc$up.both)*length(x)/length(row.names(se)))),
p.up=overlap.prob(x, dc$up.both, row.names(se)),
overlap.down=length(intersect(x, dc$down.both)),
enrichment.down=log2(length(intersect(x, dc$down.both))/(length(dc$down.both)*length(x)/length(row.names(se)))),
p.down=overlap.prob(x, dc$down.both, row.names(se))
)
}))<- tf.en[which(rowMax(tf.en[,grep("overlap",colnames(tf.en))])>1),]
tf.en <- as.data.frame(tf.en[order(rowMin(tf.en[,grep("^p\\.",colnames(tf.en))])),])
tf.en $gene <- row.names(tf.en)
tf.en$q.up <- p.adjust(tf.en$p.up, "fdr")
tf.en$q.down <- p.adjust(tf.en$p.down, "fdr")
tf.enggplot(tf.en, aes(enrichment.up, -log10(q.up), size=overlap.up)) + geom_point(alpha=0.3) + geom_text_repel(data=tf.en[which(tf.en$q.up<0.01),], mapping=aes(label=gene), show.legend = FALSE) + ggtitle("TF target enrichment among\nconsistently upregulated genes") + xlim(0,3)
## Warning: Removed 89 rows containing missing values (geom_point).
Viper-based:
<- mergeSEs(SEs[1:2], use.assays="logFC", do.scale = FALSE)
se <- se[,se$EXPO %in% c("CNT","1X","1000X")]
se
load("Data/regulon_weighted.RData")
<- viper(assays(SEs$chronic.org[,SEs$chronic.org$EXPO %in% c("CNT","1X","1000X")])$corrected, regulon, verbose=FALSE)
vi1 <- viper(assays(SEs$chronic.fetal[,SEs$chronic.fetal$EXPO %in% c("CNT","1X","1000X")])$corrected, regulon, verbose=FALSE)
vi2 <- intersect(row.names(vi1),row.names(vi2))
i <- cbind(vi1[i,], vi2[i,])
vi
$EXPO <- droplevels(se$EXPO)
se<- model.matrix(~se$Dataset+se$EXPO)
mm <- topTable(eBayes(lmFit(vi,mm)),coef = c("se$EXPO1X","se$EXPO1000X"), number=Inf)
tf.en2 head(tf.en2)
## se.EXPO1X se.EXPO1000X AveExpr F P.Value adj.P.Val
## ZNF274 0.111530901 0.15886522 0.06467018 28.37023 2.085620e-09 6.632270e-07
## ELK4 0.042128218 0.07863984 2.77828079 24.81498 1.378553e-08 2.191900e-06
## E2F4 0.304397815 0.23003400 6.99105996 23.99906 2.163410e-08 2.293215e-06
## KMT2A 0.016202562 -0.06656677 5.11525712 19.49077 2.970048e-07 2.341586e-05
## RFX5 -0.002103448 0.09231172 1.51665776 19.13818 3.681739e-07 2.341586e-05
## NFATC1 -0.020671478 -0.05257118 1.78263768 18.45771 5.597536e-07 2.966694e-05
head(tf.en2[intersect(row.names(tf.en2),degs2),])
## se.EXPO1X se.EXPO1000X AveExpr F P.Value adj.P.Val
## RFX5 -0.0021034484 0.09231172 1.5166578 19.13818 3.681739e-07 2.341586e-05
## FOXP1 -0.0229482485 0.05248375 -4.7117767 15.60527 3.463309e-06 7.542784e-05
## TCF3 0.1213764387 0.16144268 0.9298573 13.07448 1.924118e-05 2.396945e-04
## KLF3 0.0836005725 0.01179001 0.9214891 12.89313 2.184035e-05 2.572308e-04
## SOX11 -0.0009475132 -0.12550570 1.6198808 12.16980 3.639888e-05 3.858282e-04
## SIX5 0.0621833580 0.07971467 1.5984748 10.78340 9.932909e-05 8.477172e-04