import scanpy as sc
import pandas as pd
from matplotlib import pylab
import random
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import os
import itertools
import plotly.express as px
import numpy as np
import random
import anndata as ad
from scipy.sparse import csr_matrix, issparse
from scipy import sparse
from matplotlib.colors import TwoSlopeNorm
import scanpy.external as sce
import sys
import scvelo as scv
import anndata
anndata.__version__
'0.10.8'
sc.settings.set_figure_params(dpi=80, facecolor='white', dpi_save=500)
pylab.rcParams['figure.figsize'] = (6, 6)
homeDir = os.getenv("HOME")
sys.path.insert(1, homeDir+"/utils/")
from PlotPCA_components import *
from PurgeAdata import *
DS="controls_substudy"
import anndata2ri
import rpy2.rinterface_lib.callbacks
import logging
anndata2ri.activate()
%load_ext rpy2.ipython
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)
The rpy2.ipython extension is already loaded. To reload it, use: %reload_ext rpy2.ipython
adata = sc.read_h5ad("./adatas/3.Endpoints.{}.h5ad".format(DS))
adata.layers["counts"] = adata.X.copy()
sc.pp.normalize_total(adata, 20000)
sc.pp.log1p(adata)
HVGSlist = []
for line in adata.obs["line"].unique():
adataLine = adata[adata.obs["line"] == line].copy()
sc.pp.highly_variable_genes(adataLine, flavor="seurat", n_top_genes=3000, batch_key="replicate")
adataLine.var.highly_variable
HVGS = set(adataLine.var_names[adataLine.var.highly_variable_nbatches == len(adataLine.obs.replicate.unique().tolist())].tolist())
HVGSlist.append(HVGS)
HVGSlist = list(set.intersection(*HVGSlist))
adata.var["highly_variable"] = adata.var_names.isin(HVGSlist)
each run contains 1 condition as expected
sc.tl.pca(adata)
plotPCA_components(adata, color="line")
PlottingParams: figsize:(10, 10) dpi:100 dotsize:10 legend_loc:on data fontsize:8
sc.pl.pca(adata, color=["TOP2A","DCX","STMN2", "DLX6-AS1"], ncols=4, size=30, vmin="p1", vmax="p99")
scv.tl.score_genes_cell_cycle(adata)
calculating cell cycle phase --> 'S_score' and 'G2M_score', scores of cell cycle phases (adata.obs)
sc.pl.pca(adata, color="phase", size=40)
Strange no grouping of activemcycling cells
sc.pl.pca_variance_ratio(adata)
sc.pp.neighbors(adata, n_neighbors=30, n_pcs=10)
sc.tl.umap(adata)
sc.pl.umap(adata, color=["TOP2A","DCX","STMN2", "DLX6-AS1","phase","HOPX","SLC17A6","SLC1A3","EOMES","ARHGAP11B"], ncols=4, size=30, vmin="p1", vmax="p99")
fig, axes = plt.subplots(1,len(adata.obs.line.unique()), figsize=(40, 10))
for line in enumerate(adata.obs.line.unique()):
SampleIDs = adata.obs.loc[adata.obs.line == line[1],"sample_id"].unique().tolist()
axes[line[0]] = sc.pl.umap(adata, size = 40, add_outline=True,ncols=2, color=["sample_id"],title="{} replicates".format(line[1]),na_color="white",
groups=SampleIDs, vmin='p1', vmax='p99', show=False, ax=axes[line[0]])
plt.subplots_adjust(wspace=.5)
fig.show()
LeidenDF = pd.DataFrame()
for i in np.linspace(.5,1, 6):
t = sc.tl.leiden(adata, resolution=i, copy =True, random_state=1)
LeidenDF = pd.concat([LeidenDF,t.obs["leiden"].to_frame(name=round(i,2))], axis = 1)
sc.tl.leiden(adata, resolution=.6, random_state=1)
sc.pl.umap(adata, color=["leiden","line"], size=40)
colorby = 0.6
LeidenDF["color"] = LeidenDF[colorby].astype(int)
fig = px.parallel_categories(LeidenDF,
dimensions=[i for i in LeidenDF.columns if "color" not in str(i)],
color_continuous_scale=px.colors.sequential.Jet,color="color",
width=1000,height=700)
fig.show()
sc.tl.rank_genes_groups(adata, method="wilcoxon", groupby="leiden")
sc.pl.rank_genes_groups_dotplot(adata, standard_scale="var", n_genes=10)
WARNING: dendrogram data not found (using key=dendrogram_leiden). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently.
sys.path.insert(1, homeDir+"/utils")
from plotenrich import *
from gprofiler import GProfiler
gp = GProfiler(return_dataframe=True)
for leiden in adata.obs["leiden"].unique().tolist():
DEGsLeiden = sc.get.rank_genes_groups_df(adata, group=leiden).set_index("names").head(200).index.tolist()
ontology = gp.profile(organism='hsapiens',
query=DEGsLeiden,
no_evidences=False,
background=adata.var_names.tolist())
ontology = ontology[(ontology.intersection_size > 10)]
if ontology.shape[0] > 0:
plot_enrich(ontology,save=False, title=f"Leiden:{leiden}")
plt.show()
PoliudakisRef = sc.read_h5ad("/group/testa/Users/davide.castaldi/Polaroids_spinoff/2_GenerateReferences/cortex.Reference.h5ad")
RefVars = set(PoliudakisRef.var_names.tolist())
QueryVars = set(adata.var_names.tolist())
if RefVars != QueryVars:
print("Found mismatch between Ref and Query vars: will unify")
CommonGenes = list(RefVars.intersection(QueryVars))
ref_adata = PoliudakisRef[:,CommonGenes].copy()
query_adata = adata[:,CommonGenes].copy()
Found mismatch between Ref and Query vars: will unify
sc.tl.ingest(query_adata, ref_adata, obs="LabelTransfer")
# Add to mapped obs list
adata.obs["LabelTransfer"] = query_adata.obs["LabelTransfer"]
sc.pl.umap(adata, color=["LabelTransfer"], ncols=4, size=30, vmin="p1", vmax="p99")