Skip to content
Closed
21 changes: 7 additions & 14 deletions pf2rnaseq/factorization.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,28 @@
import anndata
import cupy
import numpy as np
import scipy.sparse as sps
from pacmap import PaCMAP
from sklearn.linear_model import LinearRegression
from scipy.stats import gmean
from parafac2.parafac2 import parafac2_nd, store_pf2
from scipy.stats import gmean
from sklearn.decomposition import PCA
import anndata
import scipy.sparse as sps
import numpy as np
from sklearn.linear_model import LinearRegression
from tqdm import tqdm
import cupy


def correct_conditions(X: anndata.AnnData):
"""Correct the conditions factors by overall read depth. Ensures that weighting is not affected by cell count difference"""
# sgIndex = X.obs["condition_unique_idxs"]
sgIndex = X.obs["condition_unique_idxs"].cat.codes
counts = np.zeros((np.amax(sgIndex) + 1, 1))

cond_mean = gmean(X.uns["Pf2_A"], axis=1)

x_count = X.X.sum(axis=1)

for ii in range(counts.size):
counts[ii] = np.sum(x_count[X.obs["condition_unique_idxs"] == ii])

lr = LinearRegression()
lr.fit(counts, cond_mean.reshape(-1, 1))

counts_correct = lr.predict(counts)

return X.uns["Pf2_A"] / counts_correct


Expand All @@ -37,17 +32,15 @@ def pf2(
random_state=1,
doEmbedding: bool = True,
tolerance=1e-9,
regParam=0.0,
r2x=False,
):
cupy.cuda.Device(1).use()
cupy.cuda.Device(0).use()
pf_out, R2X = parafac2_nd(
X,
rank=rank,
random_state=random_state,
tol=tolerance,
n_iter_max=500,
l2=regParam,
)

X = store_pf2(X, pf_out)
Expand Down
130 changes: 124 additions & 6 deletions pf2rnaseq/figures/commonFuncs/plotGeneral.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
import anndata
import numpy as np
import pandas as pd
import seaborn as sns
import scanpy as sc
from scipy.stats import ranksums
import anndata
from matplotlib.axes import Axes
from ...factorization import pf2_pca_r2x
import matplotlib.pyplot as plt
import scipy.sparse
import seaborn as sns
from matplotlib.axes import Axes
from tensorly.cp_tensor import CPTensor
from tlviz.factor_tools import factor_match_score as fms

from ...factorization import pf2, pf2_pca_r2x


def plot_r2x(data, rank_vec, ax: Axes):
"""Creates R2X plot for parafac2 tensor decomposition and pca"""

r2xError = pf2_pca_r2x(data, rank_vec)
labelNames = ["Fit: Pf2", "Fit: PCA"]
colorDecomp = ["r", "b"]
Expand Down Expand Up @@ -440,3 +442,119 @@ def plot_boxplot_gene_celltype(
ax.set(title=gene)
ax.set_xticks(ax.get_xticks())
ax.set_xticklabels(labels=ax.get_xticklabels(), rotation=45)


def calculateFMS(A: anndata.AnnData, B: anndata.AnnData):
"""Calculates FMS between 2 factors"""
factors = [A.uns["Pf2_A"], A.uns["Pf2_B"], A.varm["Pf2_C"]]
A_CP = CPTensor(
(
A.uns["Pf2_weights"],
factors,
)
)

factors = [B.uns["Pf2_A"], B.uns["Pf2_B"], B.varm["Pf2_C"]]
B_CP = CPTensor(
(
B.uns["Pf2_weights"],
factors,
)
)

return fms(A_CP, B_CP, consider_weights=False, skip_mode=1) # type: ignore


def plot_fms_percent_drop(
X: anndata.AnnData,
ax: Axes,
percentList: np.ndarray,
runs: int,
rank: int = 30,
):
# Plots FMS score when percentage is removed from data
dataX = pf2(X, rank, doEmbedding=False)

fmsLists = []

for j in range(0, runs, 1):
scores = [1.0]

for i in percentList[1:]:
sampled_data: anndata.AnnData = sc.pp.subsample(
X, fraction=1 - (i / 100), random_state=j, copy=True
) # type: ignore
sampledX = pf2(sampled_data, rank, random_state=j + 2, doEmbedding=False)

fmsScore = calculateFMS(dataX, sampledX)
scores.append(fmsScore)

fmsLists.append(scores)

runsList_df = []
for i in range(0, runs):
for j in range(0, len(percentList)):
runsList_df.append(i)
percentList_df = []
for i in range(0, runs):
for j in range(0, len(percentList)):
percentList_df.append(percentList[j])
fmsList_df = []
for sublist in fmsLists:
fmsList_df += sublist
df = pd.DataFrame(
{
"Run": runsList_df,
"Percentage of Data Dropped": percentList_df,
"FMS": fmsList_df,
}
)

sns.lineplot(data=df, x="Percentage of Data Dropped", y="FMS", ax=ax)
ax.set_ylim(0, 1)


def resample(data: anndata.AnnData) -> anndata.AnnData:
"""Bootstrapping dataset"""
indices = np.random.randint(0, data.shape[0], size=(data.shape[0],))
data = data[indices].copy()
return data


def plot_fms_diff_ranks(
X: anndata.AnnData,
ax: Axes,
ranksList: list[int],
runs: int,
):
# Plots FMS when using different Pf2 components
fmsLists = []

for j in range(0, runs, 1):
scores = []
for i in ranksList:
dataX = pf2(X, rank=i, random_state=j, doEmbedding=False)

sampledX = pf2(resample(X), rank=i, random_state=j, doEmbedding=False)

fmsScore = calculateFMS(dataX, sampledX)
scores.append(fmsScore)
fmsLists.append(scores)

runsList_df = []
for i in range(0, runs):
for j in range(0, len(ranksList)):
runsList_df.append(i)
ranksList_df = []
for i in range(0, runs):
for j in range(0, len(ranksList)):
ranksList_df.append(ranksList[j])
fmsList_df = []
for sublist in fmsLists:
fmsList_df += sublist
df = pd.DataFrame(
{"Run": runsList_df, "Component": ranksList_df, "FMS": fmsList_df}
)

sns.lineplot(data=df, x="Component", y="FMS", ax=ax)
ax.set_ylim(0, 1)
15 changes: 15 additions & 0 deletions pf2rnaseq/figures/commonFuncs/plotPaCMAP.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,22 @@ def ds_show(result, ax):

def plot_gene_pacmap(gene: str, decompType: str, X: anndata.AnnData, ax: Axes):
"""Scatterplot of PaCMAP visualization weighted by gene"""

if gene not in X.var_names:
ax.text(
0.5,
0.5,
f"Gene '{gene}' not found in dataset",
ha="center",
va="center",
transform=ax.transAxes,
)
print(
f"ERROR: Gene '{gene}' not found. Available genes include: {list(X.var_names[:5])}..."
)
return ax
geneList = X[:, gene].X

if isinstance(geneList, spmatrix):
geneList = geneList.toarray()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
"""

from anndata import read_h5ad

from .common import (
subplotLabel,
getSetup,
subplotLabel,
)
from .commonFuncs.plotFactors import bot_top_genes
from .commonFuncs.plotGeneral import plot_avegene_per_celltype
Expand All @@ -19,16 +20,16 @@ def makeFigure():
# Add subplot labels
subplotLabel(ax)

#X = read_h5ad("/opt/pf2/CITEseq_fitted_annotated.h5ad", backed="r")
X = read_h5ad("/home/nicoleb/Pf2-scRNAseq-1/pf2rnaseq/Cytokine_Pf2_annotated_NB_031725.h5ad")


# X = read_h5ad("/opt/pf2/CITEseq_fitted_annotated.h5ad", backed="r")
X = read_h5ad(
"/home/nicoleb/Pf2-scRNAseq-1/pf2rnaseq/Cytokine_Pf2_annotated_NB_031725.h5ad"
)

comps = [1,12,30]
comps = [1, 12, 30]
genes = bot_top_genes(X, cmp=comps[1], geneAmount=10)

for i, gene in enumerate(genes):
plot_avegene_per_celltype(X, gene, ax[i], cellType="CellType2")
#ax[1].get_legend().remove()
# ax[1].get_legend().remove()

return f
29 changes: 0 additions & 29 deletions pf2rnaseq/figures/figureCITEseq2.py

This file was deleted.

28 changes: 0 additions & 28 deletions pf2rnaseq/figures/figureCITEseq3.py

This file was deleted.

28 changes: 0 additions & 28 deletions pf2rnaseq/figures/figureCITEseq4.py

This file was deleted.

Loading
Loading