meyer-lab
diff --git a/‎RISE/factorization.py‎
Lines changed: 118 additions & 3 deletions b/‎RISE/factorization.py‎
Lines changed: 118 additions & 3 deletions
diff --git a/‎RISE/figures/commonFuncs/plotFactors.py‎
Lines changed: 108 additions & 3 deletions b/‎RISE/figures/commonFuncs/plotFactors.py‎
Lines changed: 108 additions & 3 deletions
diff --git a/‎RISE/figures/commonFuncs/plotGeneral.py‎
Lines changed: 32 additions & 1 deletion b/‎RISE/figures/commonFuncs/plotGeneral.py‎
Lines changed: 32 additions & 1 deletion
@@ -11,7 +11,31 @@
 
 
 def correct_conditions(X: anndata.AnnData):
-    """Correct the conditions factors by overall read depth."""
+    """Correct the condition factors by normalizing for overall read depth.
+    
+    This function adjusts condition factors (stored in X.uns["Pf2_A"]) to account for
+    differences in sequencing depth across conditions. It uses linear regression to
+    model the relationship between total read counts and condition factor magnitudes,
+    then applies a correction.
+    
+    Parameters
+    ----------
+    X : anndata.AnnData
+        AnnData object containing RISE decomposition results. Must have:
+        - X.obs["condition_unique_idxs"]: 0-indexed condition assignments
+        - X.uns["Pf2_A"]: Condition factors from PARAFAC2 decomposition
+    
+    Returns
+    -------
+    numpy.ndarray
+        Corrected condition factors normalized by sequencing depth
+    
+    Examples
+    --------
+    >>> from RISE.factorization import pf2, correct_conditions
+    >>> X = pf2(adata, rank=20)
+    >>> corrected_factors = correct_conditions(X)
+    """
     sgIndex = X.obs["condition_unique_idxs"]
 
     counts = np.zeros((np.amax(sgIndex.to_numpy()) + 1, 1))
@@ -39,7 +63,61 @@ def pf2(
     tolerance=1e-9,
     max_iter: int = 500,
 ):
-    """Run Pf2 model and store results in anndata file"""
+    """Perform PARAFAC2 tensor decomposition on single-cell RNA-seq data.
+    
+    This is the main function for running RISE analysis. It decomposes the
+    multi-condition single-cell data into condition factors, eigen-state factors,
+    and gene factors, revealing patterns across experimental conditions.
+    
+    Parameters
+    ----------
+    X : anndata.AnnData
+        Preprocessed AnnData object containing single-cell RNA-seq data.
+        Must have X.obs["condition_unique_idxs"] indicating which condition
+        each cell belongs to (0-indexed).
+    rank : int
+        Number of components to extract. Determines the complexity of the
+        decomposition. Typically chosen based on variance explained and
+        Factor Match Score analysis (see plot_r2x and plot_fms_diff_ranks).
+    random_state : int, optional (default: 1)
+        Random seed for reproducibility of the decomposition.
+    doEmbedding : bool, optional (default: True)
+        If True, automatically computes PaCMAP embedding of cell projections
+        and stores in X.obsm["X_pf2_PaCMAP"]. This enables visualization
+        functions like plot_labels_pacmap.
+    tolerance : float, optional (default: 1e-9)
+        Convergence threshold for the optimization algorithm. Lower values
+        increase precision but may require more iterations.
+    max_iter : int, optional (default: 500)
+        Maximum number of iterations for the optimization algorithm.
+    
+    Returns
+    -------
+    anndata.AnnData
+        The input AnnData object with added RISE decomposition results:
+        
+        - X.uns["Pf2_weights"]: Component weights (shape: rank,)
+        - X.uns["Pf2_A"]: Condition factors (shape: n_conditions, rank)
+        - X.uns["Pf2_B"]: Eigen-state factors (shape: rank, rank)
+        - X.varm["Pf2_C"]: Gene factors (shape: n_genes, rank)
+        - X.obsm["projections"]: Cell projections (shape: n_cells, rank)
+        - X.obsm["weighted_projections"]: Weighted cell projections (shape: n_cells, rank)
+        - X.obsm["X_pf2_PaCMAP"]: PaCMAP embedding (shape: n_cells, 2) if doEmbedding=True
+    
+    Examples
+    --------
+    >>> from RISE.factorization import pf2
+    >>> # Perform decomposition with 20 components
+    >>> X = pf2(adata, rank=20, random_state=42)
+    >>> # Access results
+    >>> condition_factors = X.uns["Pf2_A"]
+    >>> gene_factors = X.varm["Pf2_C"]
+    
+    See Also
+    --------
+    rise_pca_r2x : Compute variance explained for different ranks
+    plot_fms_diff_ranks : Evaluate factor stability across ranks
+    """
     pf_out, _ = parafac2_nd(
         X, rank=rank, random_state=random_state, tol=tolerance, n_iter_max=max_iter
     )
@@ -54,7 +132,44 @@ def pf2(
 
 
 def rise_pca_r2x(X: anndata.AnnData, ranks):
-    """Run RISE/PCA on data and save R2X values"""
+    """Compute variance explained (R²X) for RISE and PCA across different ranks.
+    
+    This function evaluates how much variance in the data is explained by
+    RISE (PARAFAC2) and PCA decompositions at different component ranks.
+    Used to determine the optimal number of components for RISE analysis.
+    
+    Parameters
+    ----------
+    X : anndata.AnnData
+        Preprocessed AnnData object containing single-cell RNA-seq data.
+        Must have X.obs["condition_unique_idxs"] for RISE decomposition.
+    ranks : array-like of int
+        Array of rank values to test (e.g., [1, 5, 10, 15, 20, 25, 30]).
+        Each rank represents a different number of components.
+    
+    Returns
+    -------
+    tuple of numpy.ndarray
+        (rise_r2x, pca_r2x) where:
+        
+        - rise_r2x: Variance explained by RISE for each rank (shape: len(ranks),)
+        - pca_r2x: Variance explained by PCA for each rank (shape: len(ranks),)
+    
+    Examples
+    --------
+    >>> from RISE.factorization import rise_pca_r2x
+    >>> ranks = [1, 5, 10, 15, 20]
+    >>> rise_r2x, pca_r2x = rise_pca_r2x(adata, ranks)
+    >>> # Plot results
+    >>> import matplotlib.pyplot as plt
+    >>> plt.plot(ranks, rise_r2x, label='RISE')
+    >>> plt.plot(ranks, pca_r2x, label='PCA')
+    
+    See Also
+    --------
+    plot_r2x : Convenience function to plot variance explained
+    pf2 : Perform PARAFAC2 decomposition at chosen rank
+    """
     X = X.to_memory()
     XX = sps.csr_array(X.X)
 
 
@@ -22,7 +22,50 @@ def plot_condition_factors(
     color_key=None,
     group_cond=False,
 ):
-    """Plots condition factors"""
+    """Plot condition factors as a heatmap showing how conditions contribute to components.
+    
+    This visualization shows how each experimental condition (rows) contributes to
+    each RISE component (columns). High values indicate strong association between
+    a condition and a component's pattern. Log transformation and normalization
+    help reveal relative differences across conditions.
+    
+    Parameters
+    ----------
+    data : anndata.AnnData
+        AnnData object with RISE decomposition results. Must contain:
+        - data.uns["Pf2_A"]: Condition factors (n_conditions, rank)
+        - data.obs[cond]: Condition labels for each cell
+    ax : matplotlib.axes.Axes
+        Matplotlib axes object to plot on.
+    cond : str, optional (default: "Condition")
+        Name of column in data.obs containing condition labels.
+    log_transform : bool, optional (default: True)
+        If True, applies log10 transformation to condition factors before plotting.
+        This helps visualize differences when values span orders of magnitude.
+    cond_group_labels : pandas.Series, optional (default: None)
+        Series mapping conditions to group labels for colored row annotations.
+        Useful for grouping related conditions (e.g., drug classes, patient cohorts).
+    ThomsonNorm : bool, optional (default: False)
+        If True, normalizes factors using only control conditions (those containing 'CTRL').
+    color_key : list, optional (default: None)
+        Custom colors for condition group labels. If None, uses default palette.
+    group_cond : bool, optional (default: False)
+        If True and cond_group_labels provided, sorts conditions by group.
+    
+    Examples
+    --------
+    >>> from RISE.figures.commonFuncs.plotFactors import plot_condition_factors
+    >>> import matplotlib.pyplot as plt
+    >>> fig, ax = plt.subplots(figsize=(8, 8))
+    >>> plot_condition_factors(adata, ax=ax, cond="Condition", log_transform=True)
+    >>> plt.tight_layout()
+    >>> plt.show()
+    
+    See Also
+    --------
+    plot_eigenstate_factors : Visualize eigen-state factors
+    plot_gene_factors : Visualize gene factors
+    """
     pd.set_option("display.max_rows", None)
     yt = pd.Series(np.unique(data.obs[cond]))
     X = np.array(data.uns["Pf2_A"])
@@ -94,7 +137,36 @@ def plot_condition_factors(
 
 
 def plot_eigenstate_factors(data: anndata.AnnData, ax: Axes):
-    """Plots Pf2 eigenstate factors"""
+    """Plot eigen-state factors as a heatmap showing cell state patterns.
+    
+    Eigen-state factors represent the underlying cell state patterns across components.
+    Each row represents an eigen-state (a summary of similar cells), and each column
+    represents a component. High values indicate strong association between a cell
+    state pattern and a component.
+    
+    Parameters
+    ----------
+    data : anndata.AnnData
+        AnnData object with RISE decomposition results. Must contain:
+        - data.uns["Pf2_B"]: Eigen-state factors (rank, rank)
+    ax : matplotlib.axes.Axes
+        Matplotlib axes object to plot on.
+    
+    Examples
+    --------
+    >>> from RISE.figures.commonFuncs.plotFactors import plot_eigenstate_factors
+    >>> import matplotlib.pyplot as plt
+    >>> fig, ax = plt.subplots(figsize=(4, 4))
+    >>> plot_eigenstate_factors(adata, ax=ax)
+    >>> ax.set_ylabel("Eigen-state")
+    >>> plt.tight_layout()
+    >>> plt.show()
+    
+    See Also
+    --------
+    plot_condition_factors : Visualize condition factors
+    plot_gene_factors : Visualize gene factors
+    """
     rank = data.uns["Pf2_B"].shape[1]
     xticks = np.arange(1, rank + 1)
     X = data.uns["Pf2_B"]
@@ -115,7 +187,40 @@ def plot_eigenstate_factors(data: anndata.AnnData, ax: Axes):
 
 
 def plot_gene_factors(data: anndata.AnnData, ax: Axes, weight=0.08, trim=True):
-    """Plots Pf2 gene factors"""
+    """Plot gene factors as a heatmap showing which genes contribute to each component.
+    
+    This visualization reveals coordinated gene modules by showing which genes (rows)
+    are highly weighted in each component (columns). The weight parameter filters out
+    genes with low contributions, focusing on the most important genes for interpretation.
+    
+    Parameters
+    ----------
+    data : anndata.AnnData
+        AnnData object with RISE decomposition results. Must contain:
+        - data.varm["Pf2_C"]: Gene factors (n_genes, rank)
+    ax : matplotlib.axes.Axes
+        Matplotlib axes object to plot on.
+    weight : float, optional (default: 0.08)
+        Minimum absolute weight threshold for including genes. Genes with maximum
+        absolute weight below this value across all components are filtered out.
+        Higher values show fewer, more important genes.
+    trim : bool, optional (default: True)
+        If True, filters genes based on the weight parameter. If False, shows all genes.
+    
+    Examples
+    --------
+    >>> from RISE.figures.commonFuncs.plotFactors import plot_gene_factors
+    >>> import matplotlib.pyplot as plt
+    >>> fig, ax = plt.subplots(figsize=(7, 8))
+    >>> plot_gene_factors(adata, ax=ax, weight=0.2, trim=True)
+    >>> plt.tight_layout()
+    >>> plt.show()
+    
+    See Also
+    --------
+    plot_condition_factors : Visualize condition factors
+    plot_gene_pacmap : Overlay gene expression on PaCMAP
+    """
     rank = data.varm["Pf2_C"].shape[1]
     X = np.array(data.varm["Pf2_C"])
     yt = data.var.index.values
 
@@ -8,7 +8,38 @@
 
 
 def plot_r2x(data, rank_vec, ax: Axes):
-    """Creates R2X plot for RISE tensor decomposition and pca"""
+    \"\"\"Plot variance explained (R²X) for RISE and PCA across different ranks.
+    
+    This visualization helps determine the optimal number of components by showing
+    how variance explained increases with rank. The elbow point where the curve
+    flattens indicates a good balance between model complexity and explanatory power.
+    
+    Parameters
+    ----------
+    data : anndata.AnnData
+        Preprocessed AnnData object containing single-cell RNA-seq data.
+        Must have X.obs[\"condition_unique_idxs\"] for RISE decomposition.
+    rank_vec : array-like of int
+        Array of rank values to test (e.g., [1, 5, 10, 15, 20, 25, 30]).
+        Each rank represents a different number of components.
+    ax : matplotlib.axes.Axes
+        Matplotlib axes object to plot on.
+    
+    Examples
+    --------
+    >>> from RISE.figures.commonFuncs.plotGeneral import plot_r2x
+    >>> import matplotlib.pyplot as plt
+    >>> fig, ax = plt.subplots(figsize=(5, 5))
+    >>> ranks = [1, 5, 10, 15, 20, 25, 30]
+    >>> plot_r2x(adata, ranks, ax)
+    >>> plt.tight_layout()
+    >>> plt.show()
+    
+    See Also
+    --------
+    rise_pca_r2x : Underlying function that computes variance explained
+    plot_fms_diff_ranks : Evaluate factor stability across ranks
+    \"\"\"
     r2xError = rise_pca_r2x(data, rank_vec)
     labelNames = ["Fit: RISE", "Fit: PCA"]
     colorDecomp = ["r", "b"]