|
17 | 17 |
|
18 | 18 | use std::sync::Arc; |
19 | 19 |
|
| 20 | +use crate::physical_optimizer::enforce_distribution::projection_exec_with_alias; |
| 21 | +use crate::physical_optimizer::sanity_checker::{ |
| 22 | + assert_sanity_check, assert_sanity_check_err, |
| 23 | +}; |
20 | 24 | use crate::physical_optimizer::test_utils::{ |
21 | 25 | aggregate_exec, bounded_window_exec, bounded_window_exec_non_set_monotonic, |
22 | 26 | bounded_window_exec_with_partition, check_integrity, coalesce_batches_exec, |
23 | 27 | coalesce_partitions_exec, create_test_schema, create_test_schema2, |
24 | 28 | create_test_schema3, create_test_schema4, filter_exec, global_limit_exec, |
25 | 29 | hash_join_exec, limit_exec, local_limit_exec, memory_exec, parquet_exec, |
26 | | - repartition_exec, sort_exec, sort_expr, sort_expr_options, sort_merge_join_exec, |
| 30 | + parquet_exec_with_stats, repartition_exec, schema, single_partitioned_aggregate, |
| 31 | + sort_exec, sort_expr, sort_expr_options, sort_merge_join_exec, |
27 | 32 | sort_preserving_merge_exec, sort_preserving_merge_exec_with_fetch, |
28 | 33 | spr_repartition_exec, stream_exec_ordered, union_exec, RequirementsTestExec, |
29 | 34 | }; |
@@ -2280,3 +2285,62 @@ async fn test_not_replaced_with_partial_sort_for_unbounded_input() -> Result<()> |
2280 | 2285 | assert_optimized!(expected_input, expected_no_change, physical_plan, true); |
2281 | 2286 | Ok(()) |
2282 | 2287 | } |
| 2288 | + |
| 2289 | +#[tokio::test] |
| 2290 | +async fn test_preserve_needed_coalesce() -> Result<()> { |
| 2291 | + // Input to EnforceSorting, from our test case. |
| 2292 | + let plan = projection_exec_with_alias( |
| 2293 | + union_exec(vec![parquet_exec_with_stats(); 2]), |
| 2294 | + vec![ |
| 2295 | + ("a".to_string(), "a".to_string()), |
| 2296 | + ("b".to_string(), "value".to_string()), |
| 2297 | + ], |
| 2298 | + ); |
| 2299 | + let plan = Arc::new(CoalescePartitionsExec::new(plan)); |
| 2300 | + let schema = schema(); |
| 2301 | + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { |
| 2302 | + expr: col("a", &schema).unwrap(), |
| 2303 | + options: SortOptions::default(), |
| 2304 | + }]); |
| 2305 | + let plan: Arc<dyn ExecutionPlan> = |
| 2306 | + single_partitioned_aggregate(plan, vec![("a".to_string(), "a1".to_string())]); |
| 2307 | + let plan = sort_exec(sort_key, plan); |
| 2308 | + |
| 2309 | + // Starting plan: as in our test case. |
| 2310 | + assert_eq!( |
| 2311 | + get_plan_string(&plan), |
| 2312 | + vec![ |
| 2313 | + "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]", |
| 2314 | + " AggregateExec: mode=SinglePartitioned, gby=[a@0 as a1], aggr=[]", |
| 2315 | + " CoalescePartitionsExec", |
| 2316 | + " ProjectionExec: expr=[a@0 as a, b@1 as value]", |
| 2317 | + " UnionExec", |
| 2318 | + " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet", |
| 2319 | + " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet", |
| 2320 | + ], |
| 2321 | + ); |
| 2322 | + // Test: plan is valid. |
| 2323 | + assert_sanity_check(&plan, true); |
| 2324 | + |
| 2325 | + // EnforceSorting will remove the coalesce, and add an SPM further up (above the aggregate). |
| 2326 | + let optimizer = EnforceSorting::new(); |
| 2327 | + let optimized = optimizer.optimize(plan, &Default::default())?; |
| 2328 | + assert_eq!( |
| 2329 | + get_plan_string(&optimized), |
| 2330 | + vec![ |
| 2331 | + "SortPreservingMergeExec: [a@0 ASC]", |
| 2332 | + " SortExec: expr=[a@0 ASC], preserve_partitioning=[true]", |
| 2333 | + " AggregateExec: mode=SinglePartitioned, gby=[a@0 as a1], aggr=[]", |
| 2334 | + " ProjectionExec: expr=[a@0 as a, b@1 as value]", |
| 2335 | + " UnionExec", |
| 2336 | + " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet", |
| 2337 | + " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet", |
| 2338 | + ], |
| 2339 | + ); |
| 2340 | + |
| 2341 | + // Bug: Plan is now invalid. |
| 2342 | + let err = "does not satisfy distribution requirements: HashPartitioned[[a@0]]). Child-0 output partitioning: UnknownPartitioning(2)"; |
| 2343 | + assert_sanity_check_err(&optimized, err); |
| 2344 | + |
| 2345 | + Ok(()) |
| 2346 | +} |
0 commit comments