Skip to content

Commit dfcb3ab

Browse files
authored
Merge pull request #18 from earmingol/dev
Implemented memory efficient smoothing
2 parents d2bf5f1 + 0032bff commit dfcb3ab

File tree

2 files changed

+30
-20
lines changed

2 files changed

+30
-20
lines changed

sccellfie/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,4 @@
1616
from .reaction_activity import (compute_reaction_activity)
1717
from .sccellfie_pipeline import (run_sccellfie_pipeline)
1818

19-
__version__ = "0.4.6"
19+
__version__ = "0.4.7"

sccellfie/expression/smoothing.py

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def get_smoothing_matrix(adata, mode, neighbors_key='neighbors'):
2222
and 'params' from the analysis.
2323
2424
Returns
25-
-------
25+
-------
2626
S : numpy.ndarray or scipy.sparse.csr_matrix
2727
The smoothing matrix S such that S @ X smoothes the signal X over neighbors.
2828
If the input matrix A is sparse, S will be returned as a scipy.sparse.csr_matrix.
@@ -42,16 +42,21 @@ def get_smoothing_matrix(adata, mode, neighbors_key='neighbors'):
4242
else:
4343
raise ValueError(f'unknown mode {mode}')
4444

45-
update = False
46-
if sp.issparse(A):
47-
A = A.toarray()
48-
update = True
4945
# Normalize the smoothing matrix
50-
norm_vec = A.sum(axis=1)
51-
norm_vec[norm_vec == 0] = 1 # Avoid division by zero
52-
S = A / norm_vec[:, np.newaxis]
53-
if update:
54-
S = sp.csr_matrix(S)
46+
if not sp.issparse(A):
47+
norm_vec = A.sum(axis=1)
48+
norm_vec[norm_vec == 0] = 1 # Avoid division by zero
49+
S = A / norm_vec[:, np.newaxis]
50+
else:
51+
# Memory-efficient sparse normalization
52+
A = A.tocsr()
53+
norm_vec = np.array(A.sum(axis=1)).flatten()
54+
norm_vec[norm_vec == 0] = 1 # Avoid division by zero
55+
56+
# Use sparse diagonal matrix for row-wise scaling
57+
D_inv = sp.diags(1.0 / norm_vec, format='csr')
58+
S = D_inv @ A
59+
5560
return S
5661

5762

@@ -131,7 +136,8 @@ def smooth_expression_knn(adata, key_added='smoothed_X', neighbors_key='neighbor
131136
if isinstance(X, sp.coo_matrix):
132137
X = X.tocsr()
133138

134-
smoothed_matrix = np.zeros(X.shape)
139+
# Collect smoothed chunks for memory efficiency
140+
smoothed_chunks = []
135141

136142
# Iterate over chunks of cells
137143
for i in tqdm(range(n_chunks), disable=disable_pbar, desc='Smoothing Expression'):
@@ -158,21 +164,25 @@ def smooth_expression_knn(adata, key_added='smoothed_X', neighbors_key='neighbor
158164

159165
# Compute the expression data purely based on cell neighbors
160166
chunk_smoothed = smoothing_mat @ chunk_expression
161-
if sp.issparse(X):
162-
chunk_smoothed = chunk_smoothed.toarray()
163-
X_ = X.toarray()
164-
else:
165-
X_ = X
166167

167168
# Extract the smoothed expression for the cells in the current chunk
168169
chunk_indices = np.arange(start_idx, end_idx)
169170
subset_mapping = dict(zip(chunk_and_neighbors, range(len(chunk_and_neighbors))))
170171
smoothed_chunk_indices = [subset_mapping[i] for i in chunk_indices]
171172

172173
# Smooth by alpha
173-
smoothed_matrix[chunk_indices, :] = (1. - alpha) * X_[chunk_indices, :] + alpha * chunk_smoothed[smoothed_chunk_indices, :]
174+
if sp.issparse(X):
175+
X_chunk = X[chunk_indices, :]
176+
smoothed_result = (1. - alpha) * X_chunk + alpha * chunk_smoothed[smoothed_chunk_indices, :]
177+
else:
178+
smoothed_result = (1. - alpha) * X[chunk_indices, :] + alpha * chunk_smoothed[smoothed_chunk_indices, :]
179+
180+
smoothed_chunks.append(smoothed_result)
174181

175182
# Store the smoothed expression matrix in adata.layers
176-
if sp.issparse(adata.X):
177-
smoothed_matrix = sp.csr_matrix(smoothed_matrix)
183+
if sp.issparse(X):
184+
smoothed_matrix = sp.vstack(smoothed_chunks, format='csr')
185+
else:
186+
smoothed_matrix = np.vstack(smoothed_chunks)
187+
178188
adata.layers[key_added] = smoothed_matrix

0 commit comments

Comments
 (0)