@@ -27,9 +27,11 @@ use datafusion::datasource::physical_plan::{
2727} ;
2828use datafusion:: datasource:: source:: DataSourceExec ;
2929use datafusion:: execution:: object_store:: ObjectStoreUrl ;
30+ use datafusion:: execution:: SendableRecordBatchStream ;
3031use datafusion:: physical_expr:: expressions:: BinaryExpr ;
3132use datafusion:: physical_expr:: PhysicalExpr ;
3233use datafusion:: physical_expr_adapter:: PhysicalExprAdapterFactory ;
34+ use datafusion:: physical_plan:: stream:: RecordBatchStreamAdapter ;
3335use datafusion:: prelude:: SessionContext ;
3436use datafusion:: scalar:: ScalarValue ;
3537use datafusion_comet_spark_expr:: EvalMode ;
@@ -149,22 +151,6 @@ pub(crate) fn init_datasource_exec(
149151
150152 let data_source_exec = Arc :: new ( DataSourceExec :: new ( Arc :: new ( file_scan_config) ) ) ;
151153
152- // Debug: Execute the plan and print output RecordBatches
153- // let debug_ctx = SessionContext::default();
154- // let task_ctx = debug_ctx.task_ctx();
155- // if let Ok(stream) = data_source_exec.execute(0, task_ctx) {
156- // let rt = tokio::runtime::Runtime::new().unwrap();
157- // rt.block_on(async {
158- // let batches: Vec<_> = stream.collect::<Vec<_>>().await;
159- // let record_batches: Vec<_> = batches.into_iter().filter_map(|r| r.ok()).collect();
160- // println!("=== DataSourceExec output RecordBatches ===");
161- // if let Err(e) = print_batches(&record_batches) {
162- // println!("Error printing batches: {:?}", e);
163- // }
164- // println!("=== End of DataSourceExec output ===");
165- // });
166- // }
167-
168154 Ok ( data_source_exec)
169155}
170156
@@ -194,3 +180,25 @@ fn get_options(
194180
195181 ( table_parquet_options, spark_parquet_options)
196182}
183+
184+ /// Wraps a `SendableRecordBatchStream` to print each batch as it flows through.
185+ /// Returns a new `SendableRecordBatchStream` that yields the same batches.
186+ pub fn dbg_batch_stream ( stream : SendableRecordBatchStream ) -> SendableRecordBatchStream {
187+ use futures:: StreamExt ;
188+ let schema = stream. schema ( ) ;
189+ let printing_stream = stream. map ( |batch_result| {
190+ match & batch_result {
191+ Ok ( batch) => {
192+ dbg ! ( batch, batch. schema( ) ) ;
193+ for ( col_idx, column) in batch. columns ( ) . iter ( ) . enumerate ( ) {
194+ dbg ! ( col_idx, column, column. nulls( ) ) ;
195+ }
196+ }
197+ Err ( e) => {
198+ println ! ( "batch error: {:?}" , e) ;
199+ }
200+ }
201+ batch_result
202+ } ) ;
203+ Box :: pin ( RecordBatchStreamAdapter :: new ( schema, printing_stream) )
204+ }
0 commit comments