@@ -22,7 +22,7 @@ def get_smoothing_matrix(adata, mode, neighbors_key='neighbors'):
2222 and 'params' from the analysis.
2323
2424 Returns
25- -------
25+ -------
2626 S : numpy.ndarray or scipy.sparse.csr_matrix
2727 The smoothing matrix S such that S @ X smoothes the signal X over neighbors.
2828 If the input matrix A is sparse, S will be returned as a scipy.sparse.csr_matrix.
@@ -42,16 +42,21 @@ def get_smoothing_matrix(adata, mode, neighbors_key='neighbors'):
4242 else :
4343 raise ValueError (f'unknown mode { mode } ' )
4444
45- update = False
46- if sp .issparse (A ):
47- A = A .toarray ()
48- update = True
4945 # Normalize the smoothing matrix
50- norm_vec = A .sum (axis = 1 )
51- norm_vec [norm_vec == 0 ] = 1 # Avoid division by zero
52- S = A / norm_vec [:, np .newaxis ]
53- if update :
54- S = sp .csr_matrix (S )
46+ if not sp .issparse (A ):
47+ norm_vec = A .sum (axis = 1 )
48+ norm_vec [norm_vec == 0 ] = 1 # Avoid division by zero
49+ S = A / norm_vec [:, np .newaxis ]
50+ else :
51+ # Memory-efficient sparse normalization
52+ A = A .tocsr ()
53+ norm_vec = np .array (A .sum (axis = 1 )).flatten ()
54+ norm_vec [norm_vec == 0 ] = 1 # Avoid division by zero
55+
56+ # Use sparse diagonal matrix for row-wise scaling
57+ D_inv = sp .diags (1.0 / norm_vec , format = 'csr' )
58+ S = D_inv @ A
59+
5560 return S
5661
5762
@@ -131,7 +136,8 @@ def smooth_expression_knn(adata, key_added='smoothed_X', neighbors_key='neighbor
131136 if isinstance (X , sp .coo_matrix ):
132137 X = X .tocsr ()
133138
134- smoothed_matrix = np .zeros (X .shape )
139+ # Collect smoothed chunks for memory efficiency
140+ smoothed_chunks = []
135141
136142 # Iterate over chunks of cells
137143 for i in tqdm (range (n_chunks ), disable = disable_pbar , desc = 'Smoothing Expression' ):
@@ -158,21 +164,25 @@ def smooth_expression_knn(adata, key_added='smoothed_X', neighbors_key='neighbor
158164
159165 # Compute the expression data purely based on cell neighbors
160166 chunk_smoothed = smoothing_mat @ chunk_expression
161- if sp .issparse (X ):
162- chunk_smoothed = chunk_smoothed .toarray ()
163- X_ = X .toarray ()
164- else :
165- X_ = X
166167
167168 # Extract the smoothed expression for the cells in the current chunk
168169 chunk_indices = np .arange (start_idx , end_idx )
169170 subset_mapping = dict (zip (chunk_and_neighbors , range (len (chunk_and_neighbors ))))
170171 smoothed_chunk_indices = [subset_mapping [i ] for i in chunk_indices ]
171172
172173 # Smooth by alpha
173- smoothed_matrix [chunk_indices , :] = (1. - alpha ) * X_ [chunk_indices , :] + alpha * chunk_smoothed [smoothed_chunk_indices , :]
174+ if sp .issparse (X ):
175+ X_chunk = X [chunk_indices , :]
176+ smoothed_result = (1. - alpha ) * X_chunk + alpha * chunk_smoothed [smoothed_chunk_indices , :]
177+ else :
178+ smoothed_result = (1. - alpha ) * X [chunk_indices , :] + alpha * chunk_smoothed [smoothed_chunk_indices , :]
179+
180+ smoothed_chunks .append (smoothed_result )
174181
175182 # Store the smoothed expression matrix in adata.layers
176- if sp .issparse (adata .X ):
177- smoothed_matrix = sp .csr_matrix (smoothed_matrix )
183+ if sp .issparse (X ):
184+ smoothed_matrix = sp .vstack (smoothed_chunks , format = 'csr' )
185+ else :
186+ smoothed_matrix = np .vstack (smoothed_chunks )
187+
178188 adata .layers [key_added ] = smoothed_matrix
0 commit comments