meyer-lab · nbedanova · May 15, 2025 · May 15, 2025 · May 15, 2025 · May 21, 2025
diff --git a/pf2rnaseq/factorization.py b/pf2rnaseq/factorization.py
@@ -1,33 +1,28 @@
+import anndata
+import cupy
+import numpy as np
+import scipy.sparse as sps
 from pacmap import PaCMAP
-from sklearn.linear_model import LinearRegression
-from scipy.stats import gmean
 from parafac2.parafac2 import parafac2_nd, store_pf2
+from scipy.stats import gmean
 from sklearn.decomposition import PCA
-import anndata
-import scipy.sparse as sps
-import numpy as np
+from sklearn.linear_model import LinearRegression
 from tqdm import tqdm
-import cupy
 
 
 def correct_conditions(X: anndata.AnnData):
     """Correct the conditions factors by overall read depth. Ensures that weighting is not affected by cell count difference"""
     # sgIndex = X.obs["condition_unique_idxs"]
     sgIndex = X.obs["condition_unique_idxs"].cat.codes
     counts = np.zeros((np.amax(sgIndex) + 1, 1))
-
     cond_mean = gmean(X.uns["Pf2_A"], axis=1)
-
     x_count = X.X.sum(axis=1)
-
     for ii in range(counts.size):
         counts[ii] = np.sum(x_count[X.obs["condition_unique_idxs"] == ii])
 
     lr = LinearRegression()
     lr.fit(counts, cond_mean.reshape(-1, 1))
-
     counts_correct = lr.predict(counts)
-
     return X.uns["Pf2_A"] / counts_correct
 
 
@@ -37,17 +32,15 @@ def pf2(
     random_state=1,
     doEmbedding: bool = True,
     tolerance=1e-9,
-    regParam=0.0,
     r2x=False,
 ):
-    cupy.cuda.Device(1).use()
+    cupy.cuda.Device(0).use()
     pf_out, R2X = parafac2_nd(
         X,
         rank=rank,
         random_state=random_state,
         tol=tolerance,
         n_iter_max=500,
-        l2=regParam,
     )
 
     X = store_pf2(X, pf_out)

diff --git a/pf2rnaseq/figures/commonFuncs/plotGeneral.py b/pf2rnaseq/figures/commonFuncs/plotGeneral.py
@@ -1,17 +1,19 @@
+import anndata
 import numpy as np
 import pandas as pd
-import seaborn as sns
 import scanpy as sc
-from scipy.stats import ranksums
-import anndata
-from matplotlib.axes import Axes
-from ...factorization import pf2_pca_r2x
-import matplotlib.pyplot as plt
 import scipy.sparse
+import seaborn as sns
+from matplotlib.axes import Axes
+from tensorly.cp_tensor import CPTensor
+from tlviz.factor_tools import factor_match_score as fms
+
+from ...factorization import pf2, pf2_pca_r2x
 
 
 def plot_r2x(data, rank_vec, ax: Axes):
     """Creates R2X plot for parafac2 tensor decomposition and pca"""
+
     r2xError = pf2_pca_r2x(data, rank_vec)
     labelNames = ["Fit: Pf2", "Fit: PCA"]
     colorDecomp = ["r", "b"]
@@ -440,3 +442,119 @@ def plot_boxplot_gene_celltype(
     ax.set(title=gene)
     ax.set_xticks(ax.get_xticks())
     ax.set_xticklabels(labels=ax.get_xticklabels(), rotation=45)
+
+
+def calculateFMS(A: anndata.AnnData, B: anndata.AnnData):
+    """Calculates FMS between 2 factors"""
+    factors = [A.uns["Pf2_A"], A.uns["Pf2_B"], A.varm["Pf2_C"]]
+    A_CP = CPTensor(
+        (
+            A.uns["Pf2_weights"],
+            factors,
+        )
+    )
+
+    factors = [B.uns["Pf2_A"], B.uns["Pf2_B"], B.varm["Pf2_C"]]
+    B_CP = CPTensor(
+        (
+            B.uns["Pf2_weights"],
+            factors,
+        )
+    )
+
+    return fms(A_CP, B_CP, consider_weights=False, skip_mode=1)  # type: ignore
+
+
+def plot_fms_percent_drop(
+    X: anndata.AnnData,
+    ax: Axes,
+    percentList: np.ndarray,
+    runs: int,
+    rank: int = 30,
+):
+    # Plots FMS score when percentage is removed from data
+    dataX = pf2(X, rank, doEmbedding=False)
+
+    fmsLists = []
+
+    for j in range(0, runs, 1):
+        scores = [1.0]
+
+        for i in percentList[1:]:
+            sampled_data: anndata.AnnData = sc.pp.subsample(
+                X, fraction=1 - (i / 100), random_state=j, copy=True
+            )  # type: ignore
+            sampledX = pf2(sampled_data, rank, random_state=j + 2, doEmbedding=False)
+
+            fmsScore = calculateFMS(dataX, sampledX)
+            scores.append(fmsScore)
+
+        fmsLists.append(scores)
+
+    runsList_df = []
+    for i in range(0, runs):
+        for j in range(0, len(percentList)):
+            runsList_df.append(i)
+    percentList_df = []
+    for i in range(0, runs):
+        for j in range(0, len(percentList)):
+            percentList_df.append(percentList[j])
+    fmsList_df = []
+    for sublist in fmsLists:
+        fmsList_df += sublist
+    df = pd.DataFrame(
+        {
+            "Run": runsList_df,
+            "Percentage of Data Dropped": percentList_df,
+            "FMS": fmsList_df,
+        }
+    )
+
+    sns.lineplot(data=df, x="Percentage of Data Dropped", y="FMS", ax=ax)
+    ax.set_ylim(0, 1)
+
+
+def resample(data: anndata.AnnData) -> anndata.AnnData:
+    """Bootstrapping dataset"""
+    indices = np.random.randint(0, data.shape[0], size=(data.shape[0],))
+    data = data[indices].copy()
+    return data
+
+
+def plot_fms_diff_ranks(
+    X: anndata.AnnData,
+    ax: Axes,
+    ranksList: list[int],
+    runs: int,
+):
+    # Plots FMS when using different Pf2 components
+    fmsLists = []
+
+    for j in range(0, runs, 1):
+        scores = []
+        for i in ranksList:
+            dataX = pf2(X, rank=i, random_state=j, doEmbedding=False)
+
+            sampledX = pf2(resample(X), rank=i, random_state=j, doEmbedding=False)
+
+            fmsScore = calculateFMS(dataX, sampledX)
+            scores.append(fmsScore)
+        fmsLists.append(scores)
+
+    runsList_df = []
+    for i in range(0, runs):
+        for j in range(0, len(ranksList)):
+            runsList_df.append(i)
+    ranksList_df = []
+    for i in range(0, runs):
+        for j in range(0, len(ranksList)):
+            ranksList_df.append(ranksList[j])
+    fmsList_df = []
+    for sublist in fmsLists:
+        fmsList_df += sublist
+    df = pd.DataFrame(
+        {"Run": runsList_df, "Component": ranksList_df, "FMS": fmsList_df}
+    )
+
+    sns.lineplot(data=df, x="Component", y="FMS", ax=ax)
+    ax.set_ylim(0, 1)
diff --git a/pf2rnaseq/figures/commonFuncs/plotPaCMAP.py b/pf2rnaseq/figures/commonFuncs/plotPaCMAP.py
@@ -46,7 +46,22 @@ def ds_show(result, ax):
 
 def plot_gene_pacmap(gene: str, decompType: str, X: anndata.AnnData, ax: Axes):
     """Scatterplot of PaCMAP visualization weighted by gene"""
+
+    if gene not in X.var_names:
+        ax.text(
+            0.5,
+            0.5,
+            f"Gene '{gene}' not found in dataset",
+            ha="center",
+            va="center",
+            transform=ax.transAxes,
+        )
+        print(
+            f"ERROR: Gene '{gene}' not found. Available genes include: {list(X.var_names[:5])}..."
+        )
+        return ax
     geneList = X[:, gene].X
+
     if isinstance(geneList, spmatrix):
         geneList = geneList.toarray()
 

diff --git a/pf2rnaseq/figures/figureCITEseq5.py → pf2rnaseq/figures/figureAveGene.py b/pf2rnaseq/figures/figureCITEseq5.py → pf2rnaseq/figures/figureAveGene.py
@@ -3,9 +3,10 @@
 """
 
 from anndata import read_h5ad
+
 from .common import (
-    subplotLabel,
     getSetup,
+    subplotLabel,
 )
 from .commonFuncs.plotFactors import bot_top_genes
 from .commonFuncs.plotGeneral import plot_avegene_per_celltype
@@ -19,16 +20,16 @@ def makeFigure():
     # Add subplot labels
     subplotLabel(ax)
 
-    #X = read_h5ad("/opt/pf2/CITEseq_fitted_annotated.h5ad", backed="r")
-    X = read_h5ad("/home/nicoleb/Pf2-scRNAseq-1/pf2rnaseq/Cytokine_Pf2_annotated_NB_031725.h5ad")
-
-
+    # X = read_h5ad("/opt/pf2/CITEseq_fitted_annotated.h5ad", backed="r")
+    X = read_h5ad(
+        "/home/nicoleb/Pf2-scRNAseq-1/pf2rnaseq/Cytokine_Pf2_annotated_NB_031725.h5ad"
+    )
 
-    comps = [1,12,30]
+    comps = [1, 12, 30]
     genes = bot_top_genes(X, cmp=comps[1], geneAmount=10)
 
     for i, gene in enumerate(genes):
         plot_avegene_per_celltype(X, gene, ax[i], cellType="CellType2")
-        #ax[1].get_legend().remove()
+        # ax[1].get_legend().remove()
 
     return f
diff --git a/pf2rnaseq/figures/figureCITEseq2.py b/pf2rnaseq/figures/figureCITEseq2.py
diff --git a/pf2rnaseq/figures/figureCITEseq3.py b/pf2rnaseq/figures/figureCITEseq3.py
diff --git a/pf2rnaseq/figures/figureCITEseq4.py b/pf2rnaseq/figures/figureCITEseq4.py