@@ -53,13 +53,25 @@ impl Iterator for PyArrowBatchesAdapter {
5353
5454    fn  next ( & mut  self )  -> Option < Self :: Item >  { 
5555        Python :: with_gil ( |py| { 
56+             println ! ( "getting next pyarrow batch" ) ; 
5657            let  mut  batches = self . batches . clone ( ) . into_bound ( py) ; 
57-             Some ( 
58-                 batches
59-                     . next ( ) ?
60-                     . and_then ( |batch| Ok ( batch. extract :: < PyArrowType < _ > > ( ) ?. 0 ) ) 
61-                     . map_err ( |err| ArrowError :: ExternalError ( Box :: new ( err) ) ) , 
62-             ) 
58+             
59+             let  next_batch = batches. next ( ) . expect ( "no next batch" ) . expect ( "failed to get next batch" ) ; 
60+ 
61+             // NOTE: This is where the failure actually occurs. 
62+             // It occurs because `from_pyarrow_bound` uses the default `RecordBatchOptions` which does *not* allow a batch with no columns. 
63+             // See https://github.com/apache/arrow-rs/pull/1552 for more details. 
64+             let  extracted = next_batch. extract :: < PyArrowType < _ > > ( ) . expect ( "failed to extract batch" ) ; 
65+             Some ( Ok ( extracted. 0 ) )    
66+ 
67+             // Some(Ok( 
68+             //     batches 
69+             //         .next() 
70+             //         .unwrap() 
71+             //         .and_then(|batch| Ok(batch.extract::<PyArrowType<_>>().unwrap().0)) 
72+             //         .unwrap() 
73+             //         // .map_err(|err| ArrowError::ExternalError(Box::new(err))), 
74+             // )) 
6375        } ) 
6476    } 
6577} 
@@ -83,6 +95,7 @@ impl DatasetExec {
8395        projection :  Option < Vec < usize > > , 
8496        filters :  & [ Expr ] , 
8597    )  -> Result < Self ,  DataFusionError >  { 
98+         println ! ( "initiating new DatasetExec" ) ; 
8699        let  columns:  Option < Result < Vec < String > ,  DataFusionError > >  = projection. map ( |p| { 
87100            p. iter ( ) 
88101                . map ( |index| { 
@@ -138,7 +151,7 @@ impl DatasetExec {
138151            Partitioning :: UnknownPartitioning ( fragments. len ( ) ) , 
139152            ExecutionMode :: Bounded , 
140153        ) ; 
141- 
154+          println ! ( "intiating new DatasetExec: done" ) ; 
142155        Ok ( DatasetExec  { 
143156            dataset :  dataset. clone ( ) . unbind ( ) , 
144157            schema, 
@@ -184,45 +197,58 @@ impl ExecutionPlan for DatasetExec {
184197        partition :  usize , 
185198        context :  Arc < TaskContext > , 
186199    )  -> DFResult < SendableRecordBatchStream >  { 
200+         println ! ( "executing DatasetExec" ) ; 
187201        let  batch_size = context. session_config ( ) . batch_size ( ) ; 
188202        Python :: with_gil ( |py| { 
189203            let  dataset = self . dataset . bind ( py) ; 
190204            let  fragments = self . fragments . bind ( py) ; 
191205            let  fragment = fragments
192206                . get_item ( partition) 
193-                 . map_err ( |err| InnerDataFusionError :: External ( Box :: new ( err) ) ) ?; 
207+                 . unwrap ( ) ; 
208+                 // .map_err(|err| InnerDataFusionError::External(Box::new(err)))?; 
194209
195210            // We need to pass the dataset schema to unify the fragment and dataset schema per PyArrow docs 
196211            let  dataset_schema = dataset
197212                . getattr ( "schema" ) 
198-                 . map_err ( |err| InnerDataFusionError :: External ( Box :: new ( err) ) ) ?; 
213+                 . unwrap ( ) ; 
214+                 // .map_err(|err| InnerDataFusionError::External(Box::new(err)))?; 
215+             println ! ( "dataset_schema: {:?}" ,  dataset_schema) ; 
199216            let  kwargs = PyDict :: new_bound ( py) ; 
200217            kwargs
201-                 . set_item ( "columns" ,  self . columns . clone ( ) ) 
202-                 . map_err ( |err| InnerDataFusionError :: External ( Box :: new ( err) ) ) ?; 
218+                 . set_item ( "columns" ,  self . columns . clone ( ) ) . unwrap ( ) ; 
219+                 //  .map_err(|err| InnerDataFusionError::External(Box::new(err)))?;
203220            kwargs
204221                . set_item ( 
205222                    "filter" , 
206223                    self . filter_expr . as_ref ( ) . map ( |expr| expr. clone_ref ( py) ) , 
207-                 ) 
208-                 . map_err ( |err| InnerDataFusionError :: External ( Box :: new ( err) ) ) ?; 
224+                 ) . unwrap ( ) ; 
225+                 //  .map_err(|err| InnerDataFusionError::External(Box::new(err)))?;
209226            kwargs
210227                . set_item ( "batch_size" ,  batch_size) 
211-                 . map_err ( |err| InnerDataFusionError :: External ( Box :: new ( err) ) ) ?; 
228+                 . unwrap ( ) ; 
229+                 // .map_err(|err| InnerDataFusionError::External(Box::new(err)))?; 
212230            let  scanner = fragment
213231                . call_method ( "scanner" ,  ( dataset_schema, ) ,  Some ( & kwargs) ) 
214-                 . map_err ( |err| InnerDataFusionError :: External ( Box :: new ( err) ) ) ?; 
232+                 . unwrap ( ) ; 
233+                 // .map_err(|err| InnerDataFusionError::External(Box::new(err)))?; 
215234            let  schema:  SchemaRef  = Arc :: new ( 
216235                scanner
217236                    . getattr ( "projected_schema" ) 
218-                     . and_then ( |schema| Ok ( schema. extract :: < PyArrowType < _ > > ( ) ?. 0 ) ) 
219-                     . map_err ( |err| InnerDataFusionError :: External ( Box :: new ( err) ) ) ?, 
237+                     . and_then ( |schema| { 
238+                         let  pyarrow_schema = schema. extract :: < PyArrowType < _ > > ( ) . unwrap ( ) . 0 ; 
239+                         println ! ( "pyarrow_schema: {:?}" ,  pyarrow_schema) ; 
240+                         Ok ( pyarrow_schema) 
241+                     } ) 
242+                     . unwrap ( ) , 
243+                     // .map_err(|err| InnerDataFusionError::External(Box::new(err)))?, 
220244            ) ; 
221245            let  record_batches:  Bound < ' _ ,  PyIterator >  = scanner
222246                . call_method0 ( "to_batches" ) 
223-                 . map_err ( |err| InnerDataFusionError :: External ( Box :: new ( err) ) ) ?
247+                 . unwrap ( ) 
248+                 // .map_err(|err| InnerDataFusionError::External(Box::new(err)))? 
224249                . iter ( ) 
225-                 . map_err ( |err| InnerDataFusionError :: External ( Box :: new ( err) ) ) ?; 
250+                 . unwrap ( ) ; 
251+                 // .map_err(|err| InnerDataFusionError::External(Box::new(err)))?; 
226252
227253            let  record_batches = PyArrowBatchesAdapter  { 
228254                batches :  record_batches. into ( ) , 
0 commit comments