1- from warnings import warn
1+ import numpy as np
22from anndata import AnnData
33from scipy import sparse
44from scipy .stats import rankdata
5- import numpy as np
5+
66
77def get_rankings (
88 data ,
@@ -29,49 +29,62 @@ def get_rankings(
2929 ranks : csr_matrix of shape (genes, cells)
3030 Sparse matrix of ranks.
3131 """
32-
33- # Accept either AnnData or matrix directly
32+ # Load matrix
3433 if isinstance (data , AnnData ):
3534 X = data .layers [layer ] if layer else data .X
3635 else :
3736 X = data
3837
3938 n_cells , n_genes = X .shape
4039
41- # Convert to array
42- is_sparse = sparse .issparse (X )
43- Xarr = X .toarray () if is_sparse else np .asarray (X )
40+ # Store COO components per cell in lists of arrays
41+ data_parts = []
42+ row_parts = []
43+ col_parts = []
4444
45- # Allocate vectors, at most max_rank entries per cell
46- n_cells , n_genes = X .shape
47- nnz_per_cell = max_rank
48- nnz_total = n_cells * nnz_per_cell
45+ for j in range (n_cells ):
46+ col = X [j , :]
47+ if sparse .issparse (col ):
48+ col = col .toarray ().ravel ()
49+ else :
50+ col = np .asarray (col , dtype = float )
4951
50- data = np .empty (nnz_total , dtype = np .int32 )
51- rows = np .empty (nnz_total , dtype = np .int32 )
52- cols = np .empty (nnz_total , dtype = np .int32 )
52+ # missing values
53+ np .nan_to_num (col , copy = False )
5354
54- #Calculate ranks, while keeping the matrix sparse
55- ptr = 0
56- for j in range (n_cells ):
57- col = Xarr [j , :].astype (float )
58- col [np .isnan (col )] = - np .inf
59- ranks = rankdata (- col , method = ties_method )
60- mask = ranks <= max_rank #mask out ranks to impose sparsity
61- idx = np .nonzero (mask )[0 ]
62- rks = ranks [idx ].astype (np .int32 )
63- n = len (idx )
64-
65- data [ptr :ptr + n ] = rks
66- rows [ptr :ptr + n ] = idx
67- cols [ptr :ptr + n ] = j
68- ptr += n
69-
70- # slice arrays to actual size
71- data = data [:ptr ]
72- rows = rows [:ptr ]
73- cols = cols [:ptr ]
74-
75- ranks_mat = sparse .coo_matrix ((data , (rows ,cols )), shape = (n_genes ,n_cells )).tocsr ()
76-
55+ # Only rank non-zero elements
56+ nz_idx = np .nonzero (col )[0 ]
57+ if len (nz_idx ) == 0 :
58+ continue
59+
60+ nz_vals = col [nz_idx ]
61+ ranks = rankdata (- nz_vals , method = ties_method ).astype (np .int32 )
62+
63+ keep_mask = ranks <= max_rank
64+ kept_idx = nz_idx [keep_mask ]
65+ kept_ranks = ranks [keep_mask ]
66+
67+ if len (kept_idx ) > max_rank :
68+ kept_idx = kept_idx [:max_rank ]
69+ kept_ranks = kept_ranks [:max_rank ]
70+
71+ n = len (kept_idx )
72+ if n == 0 :
73+ continue
74+
75+ # Convert to small NumPy arrays per cell
76+ data_parts .append (kept_ranks )
77+ row_parts .append (kept_idx )
78+ col_parts .append (np .full (n , j , dtype = np .int32 ))
79+
80+ # All zeros
81+ if not data_parts :
82+ return sparse .csr_matrix ((n_genes , n_cells ), dtype = np .int32 )
83+
84+ # Concatenate arrays only once at the end
85+ data_arr = np .concatenate (data_parts ).astype (np .int32 )
86+ rows_arr = np .concatenate (row_parts ).astype (np .int32 )
87+ cols_arr = np .concatenate (col_parts ).astype (np .int32 )
88+
89+ ranks_mat = sparse .csr_matrix ((data_arr , (rows_arr , cols_arr )), shape = (n_genes , n_cells ))
7790 return ranks_mat
0 commit comments