@@ -139,6 +139,14 @@ impl ExecutionPlan for IcebergCommitExec {
139
139
vec ! [ & self . input]
140
140
}
141
141
142
+ fn required_input_distribution ( & self ) -> Vec < datafusion:: physical_plan:: Distribution > {
143
+ vec ! [ datafusion:: physical_plan:: Distribution :: SinglePartition ; self . children( ) . len( ) ]
144
+ }
145
+
146
+ fn benefits_from_input_partitioning ( & self ) -> Vec < bool > {
147
+ vec ! [ false ]
148
+ }
149
+
142
150
fn with_new_children (
143
151
self : Arc < Self > ,
144
152
children : Vec < Arc < dyn ExecutionPlan > > ,
@@ -265,14 +273,16 @@ mod tests {
265
273
use std:: fmt;
266
274
use std:: sync:: Arc ;
267
275
268
- use datafusion:: arrow:: array:: { ArrayRef , RecordBatch , StringArray } ;
276
+ use datafusion:: arrow:: array:: { ArrayRef , Int32Array , RecordBatch , StringArray , UInt64Array } ;
269
277
use datafusion:: arrow:: datatypes:: { DataType , Field , Schema as ArrowSchema } ;
278
+ use datafusion:: datasource:: MemTable ;
270
279
use datafusion:: execution:: context:: TaskContext ;
271
280
use datafusion:: physical_expr:: { EquivalenceProperties , Partitioning } ;
272
281
use datafusion:: physical_plan:: common:: collect;
273
282
use datafusion:: physical_plan:: execution_plan:: Boundedness ;
274
283
use datafusion:: physical_plan:: stream:: RecordBatchStreamAdapter ;
275
284
use datafusion:: physical_plan:: { DisplayAs , DisplayFormatType , ExecutionPlan , PlanProperties } ;
285
+ use datafusion:: prelude:: * ;
276
286
use futures:: StreamExt ;
277
287
use iceberg:: memory:: { MEMORY_CATALOG_WAREHOUSE , MemoryCatalogBuilder } ;
278
288
use iceberg:: spec:: {
@@ -283,6 +293,7 @@ mod tests {
283
293
284
294
use super :: * ;
285
295
use crate :: physical_plan:: DATA_FILES_COL_NAME ;
296
+ use crate :: table:: IcebergTableProvider ;
286
297
287
298
// A mock execution plan that returns record batches with serialized data files
288
299
#[ derive( Debug ) ]
@@ -513,4 +524,99 @@ mod tests {
513
524
514
525
Ok ( ( ) )
515
526
}
527
+
528
+ #[ tokio:: test]
529
+ async fn test_datafusion_execution_partitioned_source ( ) -> Result < ( ) , Box < dyn std:: error:: Error > >
530
+ {
531
+ let catalog = Arc :: new (
532
+ MemoryCatalogBuilder :: default ( )
533
+ . load (
534
+ "memory" ,
535
+ HashMap :: from ( [ (
536
+ MEMORY_CATALOG_WAREHOUSE . to_string ( ) ,
537
+ "memory://root" . to_string ( ) ,
538
+ ) ] ) ,
539
+ )
540
+ . await ?,
541
+ ) ;
542
+
543
+ let namespace = NamespaceIdent :: new ( "test_namespace" . to_string ( ) ) ;
544
+ catalog. create_namespace ( & namespace, HashMap :: new ( ) ) . await ?;
545
+
546
+ let schema = Schema :: builder ( )
547
+ . with_schema_id ( 1 )
548
+ . with_fields ( vec ! [
549
+ NestedField :: required( 1 , "id" , Type :: Primitive ( PrimitiveType :: Int ) ) . into( ) ,
550
+ NestedField :: required( 2 , "name" , Type :: Primitive ( PrimitiveType :: String ) ) . into( ) ,
551
+ ] )
552
+ . build ( ) ?;
553
+
554
+ let table_name = "test_table" ;
555
+ let table_creation = TableCreation :: builder ( )
556
+ . name ( table_name. to_string ( ) )
557
+ . schema ( schema)
558
+ . location ( "memory://root/test_table" . to_string ( ) )
559
+ . properties ( HashMap :: new ( ) )
560
+ . build ( ) ;
561
+ let _ = catalog. create_table ( & namespace, table_creation) . await ?;
562
+
563
+ let arrow_schema = Arc :: new ( ArrowSchema :: new ( vec ! [
564
+ Field :: new( "id" , DataType :: Int32 , false ) ,
565
+ Field :: new( "name" , DataType :: Utf8 , false ) ,
566
+ ] ) ) ;
567
+
568
+ let batches: Vec < RecordBatch > = ( 1 ..4 )
569
+ . map ( |idx| {
570
+ RecordBatch :: try_new ( arrow_schema. clone ( ) , vec ! [
571
+ Arc :: new( Int32Array :: from( vec![ idx] ) ) as ArrayRef ,
572
+ Arc :: new( StringArray :: from( vec![ format!( "Name{idx}" ) ] ) ) as ArrayRef ,
573
+ ] )
574
+ } )
575
+ . collect :: < Result < _ , _ > > ( ) ?;
576
+
577
+ // Create DataFusion context with specific partition configuration
578
+ let mut config = SessionConfig :: new ( ) ;
579
+ config = config. set_usize ( "datafusion.execution.target_partitions" , 8 ) ;
580
+ let ctx = SessionContext :: new_with_config ( config) ;
581
+
582
+ // Create multiple partitions - each batch becomes a separate partition
583
+ let partitions: Vec < Vec < RecordBatch > > =
584
+ batches. into_iter ( ) . map ( |batch| vec ! [ batch] ) . collect ( ) ;
585
+ let source_table = Arc :: new ( MemTable :: try_new ( Arc :: clone ( & arrow_schema) , partitions) ?) ;
586
+ ctx. register_table ( "source_table" , source_table) ?;
587
+
588
+ let table_ident = TableIdent :: from_strs ( vec ! [ "test_namespace" , "test_table" ] ) ?;
589
+ let iceberg_table_provider =
590
+ IcebergTableProvider :: try_new ( catalog. clone ( ) , table_ident) . await ?;
591
+ ctx. register_table ( "iceberg_table" , Arc :: new ( iceberg_table_provider) ) ?;
592
+
593
+ let insert_plan = ctx
594
+ . sql ( "INSERT INTO iceberg_table SELECT * FROM source_table" )
595
+ . await ?;
596
+
597
+ let physical_plan = insert_plan. create_physical_plan ( ) . await ?;
598
+
599
+ let actual_plan = format ! (
600
+ "{}" ,
601
+ datafusion:: physical_plan:: displayable( physical_plan. as_ref( ) ) . indent( false )
602
+ ) ;
603
+
604
+ println ! ( "Physical plan:\n {actual_plan}" ) ;
605
+
606
+ let expected_plan = "\
607
+ IcebergCommitExec: table=test_namespace.test_table
608
+ CoalescePartitionsExec
609
+ IcebergWriteExec: table=test_namespace.test_table
610
+ DataSourceExec: partitions=3, partition_sizes=[1, 1, 1]" ;
611
+
612
+ assert_eq ! (
613
+ actual_plan. trim( ) ,
614
+ expected_plan. trim( ) ,
615
+ "Physical plan does not match expected\n \n Expected:\n {}\n \n Actual:\n {}" ,
616
+ expected_plan. trim( ) ,
617
+ actual_plan. trim( )
618
+ ) ;
619
+
620
+ Ok ( ( ) )
621
+ }
516
622
}
0 commit comments