@@ -28,6 +28,7 @@ pub async fn layout_stream_from_reader<T: VortexReadAt + Unpin>(
2828 reader : T ,
2929 projection : Projection ,
3030 row_filter : Option < RowFilter > ,
31+ indices : Option < Array > ,
3132) -> VortexResult < LayoutBatchStream < T > > {
3233 let mut builder = LayoutBatchStreamBuilder :: new (
3334 reader,
@@ -42,15 +43,20 @@ pub async fn layout_stream_from_reader<T: VortexReadAt + Unpin>(
4243 builder = builder. with_row_filter ( row_filter) ;
4344 }
4445
46+ if let Some ( indices) = indices {
47+ builder = builder. with_indices ( indices) ;
48+ }
49+
4550 builder. build ( ) . await
4651}
4752
4853pub async fn read_array_from_reader < T : VortexReadAt + Unpin + ' static > (
4954 reader : T ,
5055 projection : Projection ,
5156 row_filter : Option < RowFilter > ,
57+ indices : Option < Array > ,
5258) -> VortexResult < Array > {
53- layout_stream_from_reader ( reader, projection, row_filter)
59+ layout_stream_from_reader ( reader, projection, row_filter, indices )
5460 . await ?
5561 . read_all ( )
5662 . await
@@ -119,11 +125,13 @@ impl TokioFileDataset {
119125 & self ,
120126 columns : Option < Vec < Bound < ' _ , PyAny > > > ,
121127 row_filter : Option < & Bound < ' _ , PyExpr > > ,
128+ indices : Option < & PyArray > ,
122129 ) -> PyResult < PyArray > {
123130 let inner = read_array_from_reader (
124131 self . file ( ) . await ?,
125132 projection_from_python ( columns) ?,
126133 row_filter_from_python ( row_filter) ,
134+ indices. map ( PyArray :: unwrap) . cloned ( ) ,
127135 )
128136 . await ?;
129137 Ok ( PyArray :: new ( inner) )
@@ -133,11 +141,13 @@ impl TokioFileDataset {
133141 self_ : PyRef < ' _ , Self > ,
134142 columns : Option < Vec < Bound < ' _ , PyAny > > > ,
135143 row_filter : Option < & Bound < ' _ , PyExpr > > ,
144+ indices : Option < & PyArray > ,
136145 ) -> PyResult < PyObject > {
137146 let layout_reader = layout_stream_from_reader (
138147 self_. file ( ) . await ?,
139148 projection_from_python ( columns) ?,
140149 row_filter_from_python ( row_filter) ,
150+ indices. map ( PyArray :: unwrap) . cloned ( ) ,
141151 )
142152 . await ?;
143153
@@ -154,23 +164,25 @@ impl TokioFileDataset {
154164 self_. schema . clone ( ) . to_pyarrow ( self_. py ( ) )
155165 }
156166
157- #[ pyo3( signature = ( * , columns= None , row_filter= None ) ) ]
167+ #[ pyo3( signature = ( * , columns = None , row_filter = None , indices = None ) ) ]
158168 pub fn to_array (
159169 & self ,
160170 columns : Option < Vec < Bound < ' _ , PyAny > > > ,
161171 row_filter : Option < & Bound < ' _ , PyExpr > > ,
172+ indices : Option < & PyArray > ,
162173 ) -> PyResult < PyArray > {
163- TOKIO_RUNTIME . block_on ( self . async_to_array ( columns, row_filter) )
174+ TOKIO_RUNTIME . block_on ( self . async_to_array ( columns, row_filter, indices ) )
164175 }
165176
166- #[ pyo3( signature = ( * , columns= None , row_filter= None ) ) ]
177+ #[ pyo3( signature = ( * , columns = None , row_filter = None , indices = None ) ) ]
167178 pub fn to_record_batch_reader (
168179 self_ : PyRef < Self > ,
169180 columns : Option < Vec < Bound < ' _ , PyAny > > > ,
170181 row_filter : Option < & Bound < ' _ , PyExpr > > ,
182+ indices : Option < & PyArray > ,
171183 ) -> PyResult < PyObject > {
172184 TOKIO_RUNTIME . block_on ( Self :: async_to_record_batch_reader (
173- self_, columns, row_filter,
185+ self_, columns, row_filter, indices ,
174186 ) )
175187 }
176188}
@@ -197,11 +209,13 @@ impl ObjectStoreUrlDataset {
197209 & self ,
198210 columns : Option < Vec < Bound < ' _ , PyAny > > > ,
199211 row_filter : Option < & Bound < ' _ , PyExpr > > ,
212+ indices : Option < & PyArray > ,
200213 ) -> PyResult < PyArray > {
201214 let inner = read_array_from_reader (
202215 self . reader ( ) . await ?,
203216 projection_from_python ( columns) ?,
204217 row_filter_from_python ( row_filter) ,
218+ indices. map ( PyArray :: unwrap) . cloned ( ) ,
205219 )
206220 . await ?;
207221 Ok ( PyArray :: new ( inner) )
@@ -211,11 +225,13 @@ impl ObjectStoreUrlDataset {
211225 self_ : PyRef < ' _ , Self > ,
212226 columns : Option < Vec < Bound < ' _ , PyAny > > > ,
213227 row_filter : Option < & Bound < ' _ , PyExpr > > ,
228+ indices : Option < & PyArray > ,
214229 ) -> PyResult < PyObject > {
215230 let layout_reader = layout_stream_from_reader (
216231 self_. reader ( ) . await ?,
217232 projection_from_python ( columns) ?,
218233 row_filter_from_python ( row_filter) ,
234+ indices. map ( PyArray :: unwrap) . cloned ( ) ,
219235 )
220236 . await ?;
221237
@@ -232,23 +248,25 @@ impl ObjectStoreUrlDataset {
232248 self_. schema . clone ( ) . to_pyarrow ( self_. py ( ) )
233249 }
234250
235- #[ pyo3( signature = ( * , columns= None , row_filter= None ) ) ]
251+ #[ pyo3( signature = ( * , columns = None , row_filter = None , indices = None ) ) ]
236252 pub fn to_array (
237253 & self ,
238254 columns : Option < Vec < Bound < ' _ , PyAny > > > ,
239255 row_filter : Option < & Bound < ' _ , PyExpr > > ,
256+ indices : Option < & PyArray > ,
240257 ) -> PyResult < PyArray > {
241- TOKIO_RUNTIME . block_on ( self . async_to_array ( columns, row_filter) )
258+ TOKIO_RUNTIME . block_on ( self . async_to_array ( columns, row_filter, indices ) )
242259 }
243260
244- #[ pyo3( signature = ( * , columns= None , row_filter= None ) ) ]
261+ #[ pyo3( signature = ( * , columns = None , row_filter = None , indices = None ) ) ]
245262 pub fn to_record_batch_reader (
246263 self_ : PyRef < Self > ,
247264 columns : Option < Vec < Bound < ' _ , PyAny > > > ,
248265 row_filter : Option < & Bound < ' _ , PyExpr > > ,
266+ indices : Option < & PyArray > ,
249267 ) -> PyResult < PyObject > {
250268 TOKIO_RUNTIME . block_on ( Self :: async_to_record_batch_reader (
251- self_, columns, row_filter,
269+ self_, columns, row_filter, indices ,
252270 ) )
253271 }
254272}
0 commit comments