Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions rabies/analysis_pkg/analysis_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,14 @@ class ComplementaryPCA(BaseInterface):
output_spec = ComplementaryPCAOutputSpec

def _run_interface(self, runtime):
"""
Run Complementary Principal Component Analysis (CPCA) on BOLD fMRI data using provided priors and parameters.

Loads preprocessed BOLD data and prior spatial maps, applies CPCA modeling to extract prior and extra spatial/temporal components, generates component figures, and saves results as NIfTI images and CSV files. Output files and report folder are set as attributes for downstream workflow use.

Returns:
runtime: The updated runtime object after processing.
"""
import os
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -298,6 +306,9 @@ def _run_interface(self, runtime):
return runtime

def _list_outputs(self):
"""
Return a dictionary mapping output names to their corresponding file paths or report folder for the ComplementaryPCA interface.
"""
return {'CPCA_prior_timecourse_csv': getattr(self, 'CPCA_prior_timecourse_csv'),
'CPCA_extra_timecourse_csv': getattr(self, 'CPCA_extra_timecourse_csv'),
'CPCA_prior_filename': getattr(self, 'CPCA_prior_filename'),
Expand Down
13 changes: 13 additions & 0 deletions rabies/analysis_pkg/analysis_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,19 @@ def dual_regression(all_IC_vectors, timeseries):
### The fMRI timeseries aren't assumed theoretically to be spatially centered, and
### this measure would be removing global signal variations which we are interested in.
### Thus we prefer to avoid this step here, despite modelling limitations.
"""
Perform dual regression to decompose timeseries data into spatial weights and component timecourses using provided independent component vectors.

Parameters:
all_IC_vectors (ndarray): Array of independent component spatial maps (components x voxels).
timeseries (ndarray): Array of observed timeseries data (voxels x timepoints).

Returns:
dict: Dictionary containing:
'C' (ndarray): Component timecourses (components x timepoints), variance-normalized.
'W' (ndarray): Spatial weights (voxels x components), variance-normalized.
'S' (ndarray): Scaling factors for each component.
"""
X = all_IC_vectors.T
Y = timeseries.T
# for one given volume, it's values can be expressed through a linear combination of the components
Expand Down
16 changes: 16 additions & 0 deletions rabies/analysis_pkg/analysis_wf.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,22 @@

def init_analysis_wf(opts, commonspace_cr=False, name="analysis_wf"):

"""
Initialize and configure a Nipype workflow for neuroimaging data analysis based on specified options.

This function constructs a modular workflow that conditionally includes analysis steps such as seed-based functional connectivity, dual regression ICA, group ICA, complementary PCA, and functional connectivity matrix computation. The workflow manages subject- and group-level inputs, enforces prerequisites for certain analyses, and aggregates outputs for downstream processing.

Parameters:
opts: Configuration object containing analysis options and parameters.
commonspace_cr (bool): Indicates if confound regression outputs are in common space. Required for seed-based FC and group ICA.
name (str): Name assigned to the workflow.

Returns:
workflow: A Nipype Workflow object with nodes and connections reflecting the requested analyses.

Raises:
ValueError: If seed-based FC or group ICA is requested without common space confound regression outputs, or if any specified seed file does not exist.
"""
workflow = pe.Workflow(name=name)
subject_inputnode = pe.Node(niu.IdentityInterface(
fields=['dict_file', 'token']), name='subject_inputnode')
Expand Down
94 changes: 82 additions & 12 deletions rabies/analysis_pkg/cpca/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,23 @@


def spatial_cpca(X, q=1, c_init=None, W_prior=None, tol=1e-6, max_iter=200, verbose=1):
'''
Derives spatially orthogonal complementary components (first step of CPCA, i.e. spatial CPCA)
'''
"""
Extracts spatially orthogonal complementary components from a data matrix using the first step of Complementary Principal Component Analysis (CPCA).

Parameters:
X (ndarray): Data matrix of shape (time points, voxels).
q (int, optional): Number of spatial components to extract. Default is 1.
c_init (ndarray, optional): Initial spatial weights matrix of shape (voxels, components). If None, initialized randomly.
W_prior (ndarray, optional): Prior temporal weights matrix of shape (time points, components). If None, set to zeros.
tol (float, optional): Convergence tolerance. Default is 1e-6.
max_iter (int, optional): Maximum number of iterations. Default is 200.
verbose (int, optional): Verbosity level for logging progress.

Returns:
Cs (ndarray): Extracted spatial components (voxels × q).
Ws (ndarray): Corresponding temporal components (time points × q).
C_ (ndarray): Combined components matrix after update.
"""
# X: time by voxel matrix
# c_init: can specify an voxel by component number matrix for initiating weights

Expand Down Expand Up @@ -49,9 +63,27 @@ def spatial_cpca(X, q=1, c_init=None, W_prior=None, tol=1e-6, max_iter=200, verb
return Cs,Ws,C_

def cpca(X, C_prior, sequence = ['s','s','s','s'], verbose=False):
'''
CPCA algorithm: 1) spatial CPCA, 2) temporal CPCA with Wt correction, 3) derive final model
'''
"""
Performs Complementary Principal Component Analysis (CPCA) to extract spatial and temporal components from a data matrix.

This function iteratively derives spatial and temporal components according to a specified sequence, applies temporal correction to ensure orthogonality, normalizes component weights, and computes the final component matrices.

Parameters:
X (ndarray): Data matrix of shape (time points, voxels).
C_prior (ndarray): Prior spatial components matrix.
sequence (list of str, optional): List indicating the order and type of components to extract ('s' for spatial, 't' for temporal). Default is four spatial components.
verbose (bool, optional): If True, prints progress information.

Returns:
Cnet (ndarray): Corrected prior spatial components.
Wnet (ndarray): Corrected prior spatial weights.
Cs (ndarray): Extracted spatial components.
Ws (ndarray): Extracted spatial weights.
Ct (ndarray): Extracted temporal components.
Wt (ndarray): Extracted temporal weights.
C (ndarray): Final combined component matrix.
W (ndarray): Final combined weights matrix.
"""

q=1 # one component is derived at a time to have deterministic outputs

Expand Down Expand Up @@ -99,9 +131,25 @@ def cpca(X, C_prior, sequence = ['s','s','s','s'], verbose=False):


def cpca_quick(X, C_prior, sequence = ['s','s','s','s'], tol=1e-10, verbose=False):
'''
CPCA algorithm, but X is residualized for each spatial CPCA component to speed up convergence for next iterations
'''
"""
Performs CPCA with accelerated convergence by residualizing the data matrix after each spatial component extraction.

This function iteratively extracts spatial components from the data matrix, subtracting each extracted component's contribution before the next iteration. After all components are derived according to the specified sequence, temporal correction and normalization are applied to the resulting weights and components.

Parameters:
sequence (list of str): Specifies the order and type of components to extract ('s' for spatial, 't' for temporal).
tol (float): Convergence tolerance for spatial CPCA steps.

Returns:
Cnet (ndarray): Corrected prior spatial components.
Wnet (ndarray): Corrected prior spatial weights.
Cs (ndarray): Extracted spatial components.
Ws (ndarray): Extracted spatial weights.
Ct (ndarray): Extracted temporal components.
Wt (ndarray): Extracted temporal weights.
C (ndarray): Final combined component matrix.
W (ndarray): Final combined weight matrix.
"""

q=1 # one component is derived at a time to have deterministic outputs

Expand Down Expand Up @@ -153,9 +201,31 @@ def cpca_quick(X, C_prior, sequence = ['s','s','s','s'], tol=1e-10, verbose=Fals


def cpca_auto(X, C_prior, N_max, Wt_n='n', min_prior_sim=None, Dc_W_thresh=None, Dc_C_thresh=None):
'''
Conduct CPCA with automated estimation of n*. Wt_n sets a maximum for how many components are used for temporal CPCA.
'''
"""
Performs automated Complementary Principal Component Analysis (CPCA) with estimation of the optimal number of spatial components.

This function runs CPCA with up to `N_max` spatial components, uses a fitting report to determine the optimal number of components, and constructs the final model accordingly. The number of temporal components used can be limited by `Wt_n`. Returns the corrected prior components and weights, spatial and temporal components and weights, final combined matrices, and diagnostic figures.

Parameters:
X (np.ndarray): Data matrix (time points × voxels).
C_prior (np.ndarray): Prior spatial components matrix.
N_max (int): Maximum number of spatial components to consider.
Wt_n (int or 'n', optional): Maximum number of temporal components to use; if 'n', uses all available.
min_prior_sim (float, optional): Minimum similarity threshold for prior components in the fitting report.
Dc_W_thresh (float, optional): Threshold for temporal component diagnostics.
Dc_C_thresh (float, optional): Threshold for spatial component diagnostics.

Returns:
Cnet (np.ndarray): Corrected prior spatial components.
Wnet (np.ndarray): Corrected prior weights.
Cs (np.ndarray): Derived spatial components.
Ws (np.ndarray): Derived spatial weights.
Ct (np.ndarray): Derived temporal components.
Wt (np.ndarray): Derived temporal weights.
C (np.ndarray): Final combined components matrix.
W (np.ndarray): Final combined weights matrix.
fig_list (list): List of diagnostic figures from the fitting report.
"""

if Wt_n=='n':
Wt_n = N_max
Expand Down
81 changes: 74 additions & 7 deletions rabies/analysis_pkg/cpca/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@

def cosine_similarity(X,Y=None):
# estimate cosine similarity across the columns of X and Y combined
"""
Compute the cosine similarity matrix between columns of one or two input arrays.

If only X is provided, returns the cosine similarity among columns of X. If Y is provided, computes similarities among all columns of X and Y combined.

Returns:
Sc (ndarray): Square matrix of cosine similarities between columns.
"""
if Y is None:
arr = X.copy()
else:
Expand All @@ -15,12 +23,20 @@ def cosine_similarity(X,Y=None):


def gen_report(X,C_prior, Cs, min_prior_sim=None, Dc_W_thresh=None, Dc_C_thresh=None):
'''
Generate CPCA fitting report, and derive an optimal value for n* using minimal
prior similarity threshold, and/or thresholds on Dc_Wnet and/or Dc_Cnet.
If min_prior_sim=None, Dc_t_thresh=None, and Dc_s_thresh=None, then n_optim_idx
will be None (no optimal n* is derived).
'''
"""
Generates a CPCA fitting report and determines the optimal number of components based on similarity and distance thresholds.

Evaluates the CPCA fit using provided data and prior components, computes relevant metrics, and produces diagnostic plots. The optimal component count is selected according to user-specified thresholds for minimal prior similarity and cosine distances of spatial and temporal components. Returns the optimal index and a list of diagnostic figure objects.

Parameters:
min_prior_sim (float, optional): Minimum acceptable similarity to prior components for selecting the optimal component count.
Dc_W_thresh (float, optional): Cosine distance threshold for temporal components.
Dc_C_thresh (float, optional): Cosine distance threshold for spatial components.

Returns:
n_optim_idx (int or None): Index of the optimal number of components, or None if no thresholds are provided or met.
fig_list (list): List of matplotlib figure objects containing diagnostic plots.
"""

# generate the fitting report
Snet_s_l,Snet_t_l,prior_sim_l,Dc_Cnet,Dc_Wnet,R2_s,R2_t = evaluate_fit(X,C_prior,Cs)
Expand All @@ -37,6 +53,25 @@ def gen_report(X,C_prior, Cs, min_prior_sim=None, Dc_W_thresh=None, Dc_C_thresh=


def evaluate_fit(X,C_prior,Cs):
"""
Evaluate the quality of CPCA fitting across a range of component counts.

For each number of added candidate components, computes spatial and temporal network amplitudes, similarity to prior components, cosine distances between consecutive iterations for spatial and temporal components, and redundancy metrics (R2) for orthogonal and non-orthogonal CPCA. Returns lists and arrays of these metrics for further analysis and reporting.

Parameters:
X (ndarray): Data matrix to be decomposed.
C_prior (ndarray): Matrix of prior spatial components.
Cs (ndarray): Matrix of candidate spatial components.

Returns:
Snet_s_l (list): Spatial network amplitudes for orthogonal CPCA at each iteration.
Snet_t_l (list): Spatial network amplitudes for non-orthogonal CPCA at each iteration.
prior_sim_l (list): Cosine similarities between estimated and prior components at each iteration.
Dc_Cnet (ndarray): Cosine distances between consecutive spatial component estimates.
Dc_Wnet (ndarray): Cosine distances between consecutive temporal component estimates.
R2_s (ndarray): Redundancy (variance explained) for orthogonal CPCA at each iteration.
R2_t (ndarray): Redundancy (variance explained) for non-orthogonal CPCA at each iteration.
"""
n_prior = C_prior.shape[1]
N = Cs.shape[1]
C_prior_ = np.concatenate((C_prior, Cs),axis=1) # expand prior
Expand Down Expand Up @@ -127,6 +162,15 @@ def evaluate_fit(X,C_prior,Cs):


def optim_n(prior_sim_l,min_prior_sim,Dc_Cnet,Dc_Wnet,Dc_W_thresh,Dc_C_thresh):
"""
Determine the optimal number of components based on prior similarity and cosine distance thresholds.

Parameters:
prior_sim_l (array-like): Sequence of prior similarity values for each iteration.

Returns:
optim_idx (int or None): The index corresponding to the optimal number of components, or None if no thresholds are provided or met.
"""
if (min_prior_sim is None) and (Dc_W_thresh is None) and (Dc_C_thresh is None):
return None

Expand Down Expand Up @@ -164,7 +208,30 @@ def optim_n(prior_sim_l,min_prior_sim,Dc_Cnet,Dc_Wnet,Dc_W_thresh,Dc_C_thresh):
def plot_report(n_prior,N_max,n_optim_idx,min_prior_sim,Dc_W_thresh,Dc_C_thresh,prior_sim_l,
Snet_s_l,Snet_t_l,Dc_Cnet,Dc_Wnet,R2_s,R2_t):

fig_list = []
"""
Generate diagnostic plots for each prior component to visualize CPCA fitting metrics.

For each prior component, creates a 2x2 grid of plots showing network amplitude, prior similarity, component change via cosine distance, and redundancy metrics. Thresholds and the optimal component count are marked if provided.

Parameters:
n_prior (int): Number of prior components.
N_max (int): Maximum number of CPCA components considered.
n_optim_idx (int or None): Index of the optimal number of components, if determined.
min_prior_sim (float or None): Threshold for minimal prior similarity, if specified.
Dc_W_thresh (float or None): Cosine distance threshold for timecourse components, if specified.
Dc_C_thresh (float or None): Cosine distance threshold for spatial components, if specified.
prior_sim_l (ndarray): Array of prior similarity values across component counts.
Snet_s_l (ndarray): Array of spatial network amplitudes for orthogonal CPCA.
Snet_t_l (ndarray): Array of temporal network amplitudes for non-orthogonal CPCA.
Dc_Cnet (ndarray): Array of cosine distances for spatial components.
Dc_Wnet (ndarray): Array of cosine distances for timecourse components.
R2_s (ndarray): Redundancy metrics for orthogonal CPCA.
R2_t (ndarray): Redundancy metrics for non-orthogonal CPCA.

Returns:
fig_list (list): List of matplotlib Figure objects, one per prior component.
"""
fig_list = []
for prior_idx in range(n_prior):

fig,axes = plt.subplots(2,2, figsize=(8,8), constrained_layout=True)
Expand Down
Loading