meyer-lab
diff --git a/‎pf2rnaseq/ParameterOpt.py‎
Lines changed: 60 additions & 54 deletions b/‎pf2rnaseq/ParameterOpt.py‎
Lines changed: 60 additions & 54 deletions
diff --git a/‎pf2rnaseq/factorization.py‎
Lines changed: 6 additions & 6 deletions b/‎pf2rnaseq/factorization.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎pf2rnaseq/figures/common.py‎
Lines changed: 5 additions & 4 deletions b/‎pf2rnaseq/figures/common.py‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎pf2rnaseq/figures/commonFuncs/plotFactors.py‎
Lines changed: 7 additions & 7 deletions b/‎pf2rnaseq/figures/commonFuncs/plotFactors.py‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎pf2rnaseq/figures/figureCITEseq1.py‎
Lines changed: 5 additions & 4 deletions b/‎pf2rnaseq/figures/figureCITEseq1.py‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎pf2rnaseq/figures/figureCITEseq2.py‎
Lines changed: 2 additions & 1 deletion b/‎pf2rnaseq/figures/figureCITEseq2.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎pf2rnaseq/figures/figureCITEseq3.py‎
Lines changed: 2 additions & 1 deletion b/‎pf2rnaseq/figures/figureCITEseq3.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎pf2rnaseq/figures/figureCITEseq4.py‎
Lines changed: 2 additions & 1 deletion b/‎pf2rnaseq/figures/figureCITEseq4.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎pf2rnaseq/figures/figureCITEseq5.py‎
Lines changed: 8 additions & 7 deletions b/‎pf2rnaseq/figures/figureCITEseq5.py‎
Lines changed: 8 additions & 7 deletions
diff --git a/‎pf2rnaseq/figures/figureCITEseq6.py‎
Lines changed: 3 additions & 2 deletions b/‎pf2rnaseq/figures/figureCITEseq6.py‎
Lines changed: 3 additions & 2 deletions
@@ -2,43 +2,45 @@
 Hyperparameter sweep for Pf2 using Weights & Biases
 Optimizing rank and regularization parameter
 """
-import os
+
 import numpy as np
-import pandas as pd
-import anndata
 import wandb
-from tensorly.cp_tensor import CPTensor
-from tlviz.factor_tools import factor_match_score as fms
-import matplotlib.pyplot as plt
-import seaborn as sns
-
 from factorization import pf2
 from imports import import_cytokine
-
+from tensorly.cp_tensor import CPTensor
+from tlviz.factor_tools import factor_match_score as fms
 
 ranks = np.arange(1, 31)
 # Define the sweep configuration
 sweep_config = {
-    'method': 'grid',  # grid search for thorough exploration
-    'metric': {
-        'name': 'fms',  # optimize for factor match score
-        'goal': 'maximize'  # we want to maximize factor stability
+    "method": "grid",  # grid search for thorough exploration
+    "metric": {
+        "name": "fms",  # optimize for factor match score
+        "goal": "maximize",  # we want to maximize factor stability
     },
-    'parameters': {
-        'rank': {
-            'values': ranks  # Different component numbers to test
+    "parameters": {
+        "rank": {
+            "values": ranks  # Different component numbers to test
         },
-        'regParam': {
-            'values': [0.0, 1e-6, 1e-5, 5e-5, 1e-4]  # Different L1 regularization strengths
-        }
-    }
+        "regParam": {
+            "values": [
+                0.0,
+                1e-6,
+                1e-5,
+                5e-5,
+                1e-4,
+            ]  # Different L1 regularization strengths
+        },
+    },
 }
 
+
 def resample(data):
     """Bootstrapping dataset"""
     indices = np.random.randint(0, data.shape[0], size=(data.shape[0],))
     return data[indices].copy()
 
+
 def calculateFMS(A, B):
     """Calculates FMS between 2 factorizations"""
     A_factors = [A.uns["Pf2_A"], A.uns["Pf2_B"], A.varm["Pf2_C"]]
@@ -49,78 +51,82 @@ def calculateFMS(A, B):
 
     return fms(A_CP, B_CP, consider_weights=False, skip_mode=1)
 
+
 def calculate_sparsity(matrix, threshold=1e-6):
     """Calculate sparsity (proportion of near-zero elements)"""
     total_elements = matrix.size
     near_zero_elements = np.sum(np.abs(matrix) < threshold)
     return near_zero_elements / total_elements
 
+
 def train():
     """Main training function for wandb sweep"""
     # Initialize a new wandb run
     with wandb.init() as run:
         # Get parameters from wandb
         config = wandb.config
-        
+
         # Load data (do this once per run to save time)
         X = import_cytokine()
         print(f"Running with rank={config.rank}, regParam={config.regParam}")
-        
+
         # Set number of bootstrap samples
         n_bootstrap = 3
-        
+
         # Run base factorization with current parameters
-        base_model, r2x = pf2(X, 
-                             rank=config.rank, 
-                             random_state=42, 
-                             doEmbedding=False, 
-                             regParam=config.regParam,
-                             r2x=True)
-        
-        
+        base_model, r2x = pf2(
+            X,
+            rank=config.rank,
+            random_state=42,
+            doEmbedding=False,
+            regParam=config.regParam,
+            r2x=True,
+        )
+
         sparsity_C = calculate_sparsity(base_model.varm["Pf2_C"])
-        
-        
+
         # Log R2X and sparsity metrics
-        wandb.log({
-            "r2x": r2x,
-           
-            "sparsity_C": sparsity_C
-            
-        })
-        
+        wandb.log({"r2x": r2x, "sparsity_C": sparsity_C})
+
         # Calculate FMS across bootstrap samples
         fms_scores = []
         for i in range(n_bootstrap):
             # Create bootstrap sample
             bootstrap_data = resample(X)
-            
+
             # Run factorization on bootstrap sample
-            bootstrap_model = pf2(bootstrap_data, 
-                                 rank=config.rank, 
-                                 random_state=i, 
-                                 doEmbedding=False, 
-                                 regParam=config.regParam)
-            
+            bootstrap_model = pf2(
+                bootstrap_data,
+                rank=config.rank,
+                random_state=i,
+                doEmbedding=False,
+                regParam=config.regParam,
+            )
+
             # Calculate FMS between base model and bootstrap model
             fms_score = calculateFMS(base_model, bootstrap_model)
             fms_scores.append(fms_score)
-            
+
             # Log individual bootstrap FMS
             wandb.log({f"fms_bootstrap_{i}": fms_score})
-        
+
         # Calculate and log average FMS
         avg_fms = np.mean(fms_scores)
         wandb.log({"fms": avg_fms})
-        
-        print(f"Completed run: rank={config.rank}, regParam={config.regParam}, R2X={r2x:.4f}, FMS={avg_fms:.4f}")
+
+        print(
+            f"Completed run: rank={config.rank}, regParam={config.regParam}, R2X={r2x:.4f}, FMS={avg_fms:.4f}"
+        )
+
 
 if __name__ == "__main__":
     # Initialize wandb
     wandb.login()
-    
+
     # Create the sweep
     sweep_id = wandb.sweep(sweep_config, project="Pf2_parameter_optimization2")
-    
+
     # Run the sweep
-    wandb.agent(sweep_id, function=train, count=None)  # Set count if you want to limit runs
+    wandb.agent(
+        sweep_id, function=train, count=None
+    )  # Set count if you want to limit runs
@@ -1,13 +1,13 @@
+import anndata
+import cupy
+import numpy as np
+import scipy.sparse as sps
 from pacmap import PaCMAP
-from sklearn.linear_model import LinearRegression
-from scipy.stats import gmean
 from parafac2.parafac2 import parafac2_nd, store_pf2
+from scipy.stats import gmean
 from sklearn.decomposition import PCA
-import anndata
-import scipy.sparse as sps
-import numpy as np
+from sklearn.linear_model import LinearRegression
 from tqdm import tqdm
-import cupy
 
 
 def correct_conditions(X: anndata.AnnData):
 
@@ -2,14 +2,15 @@
 This file contains functions that are used in multiple figures.
 """
 
-from string import ascii_letters
 import sys
 import time
-import seaborn as sns
+from string import ascii_letters
+
 import matplotlib
+import seaborn as sns
+from matplotlib import gridspec
+from matplotlib import pyplot as plt
 from matplotlib.figure import Figure
-from matplotlib import gridspec, pyplot as plt
-
 
 matplotlib.use("AGG")
 
 
@@ -1,20 +1,20 @@
-from typing import Optional
-from anndata import AnnData
+
 import numpy as np
 import pandas as pd
+import scipy.cluster.hierarchy as sch
 import seaborn as sns
+from anndata import AnnData
 from matplotlib import pyplot as plt
-import scipy.cluster.hierarchy as sch
-from matplotlib.patches import Patch
 from matplotlib.axes import Axes
+from matplotlib.patches import Patch
 
 cmap = sns.diverging_palette(240, 10, as_cmap=True)
 
 
 def plot_condition_factors(
     data: AnnData,
     ax: Axes,
-    cond_group_labels: Optional[pd.Series] = None,
+    cond_group_labels: pd.Series | None = None,
     groupConditions=False,
     cond="Condition",
 ):
@@ -82,8 +82,8 @@ def plot_condition_factors(
 def plot_condition_factors_groups(
     data: AnnData,
     ax: Axes,
-    cond_group_labels: Optional[pd.Series] = None,
-    subgroup_labels: Optional[pd.Series] = None,
+    cond_group_labels: pd.Series | None = None,
+    subgroup_labels: pd.Series | None = None,
     groupConditions=False,
     cond="Condition",
     main_group_title="Treatment",
 
@@ -3,18 +3,19 @@
 and ratio of condition components based on days
 """
 
+import anndata
+import numpy as np
 from anndata import read_h5ad
 from matplotlib.axes import Axes
-import anndata
-from .common import subplotLabel, getSetup
+
+from .common import getSetup, subplotLabel
 from .commonFuncs.plotFactors import (
     plot_condition_factors,
     plot_eigenstate_factors,
-    plot_gene_factors,
     plot_factor_weight,
+    plot_gene_factors,
 )
 from .commonFuncs.plotPaCMAP import plot_labels_pacmap
-import numpy as np
 
 
 def makeFigure():
 
@@ -3,9 +3,10 @@
 """
 
 from anndata import read_h5ad
+
 from .common import (
-    subplotLabel,
     getSetup,
+    subplotLabel,
 )
 from .commonFuncs.plotPaCMAP import plot_wp_pacmap, plot_wp_per_celltype
 
 
@@ -3,9 +3,10 @@
 """
 
 from anndata import read_h5ad
+
 from .common import (
-    subplotLabel,
     getSetup,
+    subplotLabel,
 )
 from .commonFuncs.plotPaCMAP import plot_gene_pacmap
 
 
@@ -3,9 +3,10 @@
 """
 
 from anndata import read_h5ad
+
 from .common import (
-    subplotLabel,
     getSetup,
+    subplotLabel,
 )
 from .commonFuncs.plotFactors import plot_gene_factors_partial
 
 
@@ -3,9 +3,10 @@
 """
 
 from anndata import read_h5ad
+
 from .common import (
-    subplotLabel,
     getSetup,
+    subplotLabel,
 )
 from .commonFuncs.plotFactors import bot_top_genes
 from .commonFuncs.plotGeneral import plot_avegene_per_celltype
@@ -19,16 +20,16 @@ def makeFigure():
     # Add subplot labels
     subplotLabel(ax)
 
-    #X = read_h5ad("/opt/pf2/CITEseq_fitted_annotated.h5ad", backed="r")
-    X = read_h5ad("/home/nicoleb/Pf2-scRNAseq-1/pf2rnaseq/Cytokine_Pf2_annotated_NB_031725.h5ad")
-
-
+    # X = read_h5ad("/opt/pf2/CITEseq_fitted_annotated.h5ad", backed="r")
+    X = read_h5ad(
+        "/home/nicoleb/Pf2-scRNAseq-1/pf2rnaseq/Cytokine_Pf2_annotated_NB_031725.h5ad"
+    )
 
-    comps = [1,12,30]
+    comps = [1, 12, 30]
     genes = bot_top_genes(X, cmp=comps[1], geneAmount=10)
 
     for i, gene in enumerate(genes):
         plot_avegene_per_celltype(X, gene, ax[i], cellType="CellType2")
-        #ax[1].get_legend().remove()
+        # ax[1].get_legend().remove()
 
     return f
@@ -2,9 +2,10 @@
 CITEseq: Cell type percentage per Leiden cluster per condition
 """
 
-from anndata import read_h5ad
-from .common import subplotLabel, getSetup
 import seaborn as sns
+from anndata import read_h5ad
+
+from .common import getSetup, subplotLabel
 from .commonFuncs.plotGeneral import cell_count_perc_df