11use anndata:: backend:: AttributeOp ;
22use anndata:: data:: index:: Interval ;
3- use anndata:: data:: { self , DataFrameIndex } ;
3+ use anndata:: data:: { DataFrameIndex } ;
44use anndata:: {
55 backend:: { DataContainer , DatasetOp , GroupOp , ScalarType } ,
66 data:: { DynCscMatrix , DynCsrMatrix , SelectInfoElem } ,
77 ArrayData , Backend ,
88} ;
99use nalgebra_sparse:: { pattern:: SparsityPattern , CscMatrix , CsrMatrix } ;
1010use ndarray:: Slice ;
11- use std:: { collections:: HashMap , mem :: replace } ;
11+ use std:: collections:: HashMap ;
1212
13+ use crate :: utils:: subset:: { subset_csr_internal} ;
1314use crate :: { LoadingConfig , LoadingStrategy } ;
1415
16+ mod subset;
17+
1518pub ( crate ) fn select_info_elem_to_indices (
1619 elem : & SelectInfoElem ,
1720 bound : usize ,
@@ -48,6 +51,11 @@ pub(crate) fn select_info_elem_to_indices(
4851 }
4952}
5053
54+ // ####################################################################################################
55+ // Subsetting
56+ // ####################################################################################################
57+
58+
5159pub ( crate ) fn subset_dyn_csc_matrix (
5260 dyn_csc : DynCscMatrix ,
5361 s : & [ & SelectInfoElem ] ,
@@ -113,107 +121,18 @@ fn subset_csr_matrix<T>(
113121 let nrows = matrix. nrows ( ) ;
114122 let ncols = matrix. ncols ( ) ;
115123
116- let row_indices = select_info_elem_to_indices ( s[ 0 ] , nrows) ?;
124+ let row_indices = crate :: utils :: select_info_elem_to_indices ( s[ 0 ] , nrows) ?;
117125 let col_indices = if s. len ( ) > 1 {
118- select_info_elem_to_indices ( s[ 1 ] , ncols) ?
126+ crate :: utils :: select_info_elem_to_indices ( s[ 1 ] , ncols) ?
119127 } else {
120128 ( 0 ..ncols) . collect ( )
121129 } ;
122130
123- if row_indices. len ( ) == nrows && col_indices. len ( ) == ncols {
124- return Ok ( matrix) ;
125- }
126-
131+ // Use matrix disassembly to get ownership of the data
127132 let ( row_offsets, col_indices_orig, values) = matrix. disassemble ( ) ;
128-
129- if col_indices. len ( ) == ncols {
130- let mut new_row_offsets = Vec :: with_capacity ( row_indices. len ( ) + 1 ) ;
131- let mut new_col_indices = Vec :: new ( ) ;
132- let mut new_values = Vec :: new ( ) ;
133- new_row_offsets. push ( 0 ) ;
134-
135- let mut values_iter = values. into_iter ( ) ;
136- let mut current_pos = 0 ;
137-
138- for & row_idx in & row_indices {
139- let start = row_offsets[ row_idx] ;
140- let end = row_offsets[ row_idx + 1 ] ;
141-
142- for _ in current_pos..start {
143- values_iter. next ( ) ;
144- }
145-
146- new_col_indices. extend_from_slice ( & col_indices_orig[ start..end] ) ;
147- for _ in start..end {
148- if let Some ( val) = values_iter. next ( ) {
149- new_values. push ( val) ;
150- }
151- }
152-
153- current_pos = end;
154- new_row_offsets. push ( new_col_indices. len ( ) ) ;
155- }
156-
157- let new_pattern = unsafe {
158- SparsityPattern :: from_offset_and_indices_unchecked (
159- row_indices. len ( ) ,
160- ncols,
161- new_row_offsets,
162- new_col_indices,
163- )
164- } ;
165-
166- return CsrMatrix :: try_from_pattern_and_values ( new_pattern, new_values)
167- . map_err ( |e| anyhow:: anyhow!( "Failed to create CSR matrix: {:?}" , e) ) ;
168- }
169-
170- let col_map: HashMap < usize , usize > = col_indices
171- . iter ( )
172- . enumerate ( )
173- . map ( |( new_idx, & old_idx) | ( old_idx, new_idx) )
174- . collect ( ) ;
175-
176- let capacity: usize = row_indices
177- . iter ( )
178- . flat_map ( |& row| {
179- let start = row_offsets[ row] ;
180- let end = row_offsets[ row + 1 ] ;
181- ( start..end) . filter ( |& idx| col_map. contains_key ( & col_indices_orig[ idx] ) )
182- } )
183- . count ( ) ;
184-
185- let mut new_row_offsets = Vec :: with_capacity ( row_indices. len ( ) + 1 ) ;
186- let mut new_col_indices = Vec :: with_capacity ( capacity) ;
187- let mut new_values = Vec :: with_capacity ( capacity) ;
188- new_row_offsets. push ( 0 ) ;
189-
190- let mut values_vec = values;
191-
192- for & row_idx in & row_indices {
193- let start = row_offsets[ row_idx] ;
194- let end = row_offsets[ row_idx + 1 ] ;
195-
196- for idx in start..end {
197- let col = col_indices_orig[ idx] ;
198- if let Some ( & new_col) = col_map. get ( & col) {
199- new_col_indices. push ( new_col) ;
200- new_values. push ( replace ( & mut values_vec[ idx] , unsafe { std:: mem:: zeroed ( ) } ) ) ;
201- }
202- }
203- new_row_offsets. push ( new_col_indices. len ( ) ) ;
204- }
205-
206- let new_pattern = unsafe {
207- SparsityPattern :: from_offset_and_indices_unchecked (
208- row_indices. len ( ) ,
209- col_indices. len ( ) ,
210- new_row_offsets,
211- new_col_indices,
212- )
213- } ;
214-
215- CsrMatrix :: try_from_pattern_and_values ( new_pattern, new_values)
216- . map_err ( |e| anyhow:: anyhow!( "Failed to create CSR matrix: {:?}" , e) )
133+
134+ // Call the internal optimized subset function
135+ subset_csr_internal ( row_offsets, col_indices_orig, values, & row_indices, & col_indices)
217136}
218137
219138fn subset_csc_matrix < T > (
@@ -397,7 +316,6 @@ pub fn read_array_as_usize_optimized<B: Backend>(
397316}
398317
399318pub fn read_array_as_usize < B : Backend > ( dataset : & B :: Dataset ) -> anyhow:: Result < Vec < usize > > {
400- println ! ( "Dtype: {}" , dataset. dtype( ) ?) ;
401319 match dataset. dtype ( ) ? {
402320 ScalarType :: U64 => {
403321 let arr = dataset. read_array :: < u64 , ndarray:: Ix1 > ( ) ?;
@@ -544,7 +462,6 @@ pub fn build_csr_matrix<T>(
544462where
545463 CsrMatrix < T > : Into < ArrayData > ,
546464{
547- // Use unsafe constructor since we trust the data from AnnData
548465 let pattern = unsafe {
549466 SparsityPattern :: from_offset_and_indices_unchecked ( nrows, ncols, indptr, indices)
550467 } ;
@@ -553,6 +470,24 @@ where
553470 Ok ( csr. into ( ) )
554471}
555472
473+ pub fn build_csc_matrix < T > (
474+ nrows : usize ,
475+ ncols : usize ,
476+ indptr : Vec < usize > ,
477+ indices : Vec < usize > ,
478+ data : Vec < T > ,
479+ ) -> anyhow:: Result < ArrayData >
480+ where
481+ CscMatrix < T > : Into < ArrayData > ,
482+ {
483+ let pattern = unsafe {
484+ SparsityPattern :: from_offset_and_indices_unchecked ( nrows, ncols, indptr, indices)
485+ } ;
486+ let csc = CscMatrix :: try_from_pattern_and_values ( pattern, data)
487+ . map_err ( |e| anyhow:: anyhow!( "Building the CSC matrix encountered an error: {}" , e) ) ?;
488+ Ok ( csc. into ( ) )
489+ }
490+
556491pub fn read_dataframe_index (
557492 container : & DataContainer < anndata_hdf5:: H5 > ,
558493) -> anyhow:: Result < DataFrameIndex > {
0 commit comments