Skip to content

Commit 1d39e45

Browse files
committed
test: Add hash_agg_aggregation_strategy_with_nongrouped_single_value_columns_in_sort_key test
1 parent 9e663dc commit 1d39e45

File tree

1 file changed

+46
-1
lines changed

1 file changed

+46
-1
lines changed

datafusion/src/physical_plan/planner.rs

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1758,7 +1758,8 @@ fn tuple_err<T, R>(value: (Result<T>, Result<R>)) -> Result<(T, R)> {
17581758
#[cfg(test)]
17591759
mod tests {
17601760
use super::*;
1761-
use crate::logical_plan::{DFField, DFSchema, DFSchemaRef};
1761+
use crate::logical_plan::{and, DFField, DFSchema, DFSchemaRef};
1762+
use crate::physical_plan::OptimizerHints;
17621763
use crate::physical_plan::{csv::CsvReadOptions, expressions, Partitioning};
17631764
use crate::scalar::ScalarValue;
17641765
use crate::{
@@ -2041,6 +2042,50 @@ mod tests {
20412042
Ok(())
20422043
}
20432044

2045+
#[test]
2046+
fn hash_agg_aggregation_strategy_with_nongrouped_single_value_columns_in_sort_key() -> Result<()> {
2047+
let testdata = crate::test_util::arrow_test_data();
2048+
let path = format!("{}/csv/aggregate_test_100.csv", testdata);
2049+
2050+
let options = CsvReadOptions::new().schema_infer_max_records(100);
2051+
2052+
fn sort(column_name: &str) -> Expr {
2053+
col(column_name).sort(true, true)
2054+
}
2055+
2056+
// Instead of creating a mock ExecutionPlan, we have some input plan which produces the desired output_hints().
2057+
let logical_plan = LogicalPlanBuilder::scan_csv(path, options, None)?
2058+
.filter(and(col("c4").eq(lit("value_a")), col("c8").eq(lit("value_b"))))?
2059+
.sort(vec![sort("c1"), sort("c2"), sort("c3"), sort("c4"), sort("c5"), sort("c6"), sort("c7"), sort("c8")])?
2060+
.build()?;
2061+
2062+
let execution_plan = plan(&logical_plan)?;
2063+
2064+
// Note that both single_value_columns are part of the sort key... but one will not be part of the group key.
2065+
let hints: OptimizerHints = execution_plan.output_hints();
2066+
assert_eq!(hints.sort_order, Some(vec![0, 1, 2, 3, 4, 5, 6, 7]));
2067+
assert_eq!(hints.single_value_columns, vec![3, 7]);
2068+
2069+
// Now make a group_key that overlaps one single_value_column, but the single value column 7
2070+
// has column 5 and 6 ("c6" and "c7" respectively) in between.
2071+
let group_key = vec![col("c1"), col("c2"), col("c3"), col("c4"), col("c5")];
2072+
let mut ctx_state = make_ctx_state();
2073+
ctx_state.config.concurrency = 4;
2074+
let planner = DefaultPhysicalPlanner::default();
2075+
let mut physical_group_key = Vec::new();
2076+
for expr in group_key {
2077+
let phys_expr = planner.create_physical_expr(&expr, &logical_plan.schema(), &execution_plan.schema(), &ctx_state)?;
2078+
physical_group_key.push((phys_expr, "".to_owned()));
2079+
}
2080+
2081+
let mut sort_order = Vec::<usize>::new();
2082+
let is_sorted: bool = input_sorted_by_group_key(execution_plan.as_ref(), &physical_group_key, &mut sort_order);
2083+
assert!(is_sorted);
2084+
assert_eq!(sort_order, vec![0, 1, 2, 3, 4]);
2085+
2086+
Ok(())
2087+
}
2088+
20442089
#[test]
20452090
fn test_explain() {
20462091
let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]);

0 commit comments

Comments
 (0)