fix: dist planner rm col req when rm sort (GreptimeTeam#7512)

discord9 · web-flow · commit 7e243632c771 · 2026-01-05T03:27:11.000Z
* aha!

Signed-off-by: discord9 &lt;discord9@163.com&gt;

* fix: rm col_req in pql sort

Signed-off-by: discord9 &lt;discord9@163.com&gt;

* ut

Signed-off-by: discord9 &lt;discord9@163.com&gt;

* docs

Signed-off-by: discord9 &lt;discord9@163.com&gt;

* typo

Signed-off-by: discord9 &lt;discord9@163.com&gt;

* more typo

Signed-off-by: discord9 &lt;discord9@163.com&gt;

---------

Signed-off-by: discord9 &lt;discord9@163.com&gt;
diff --git a/src/query/src/dist_plan/analyzer.rs b/src/query/src/dist_plan/analyzer.rs
@@ -281,18 +281,18 @@ struct PlanRewriter {
     /// 2: Sort: t.pk1+t.pk2
     /// 3. Projection: t.number, t.pk1, t.pk2
     /// ```
-    /// `Sort` will make a column requirement for `t.pk1` at level 2.
+    /// `Sort` will make a column requirement for `t.pk1+t.pk2` at level 2.
     /// Which making `Projection` at level 1 need to add a ref to `t.pk1` as well.
     /// So that the expanded plan will be
     /// ```ignore
     /// Projection: t.number
-    ///   MergeSort: t.pk1
+    ///   MergeSort: t.pk1+t.pk2
     ///     MergeScan: remote_input=
     /// Projection: t.number, "t.pk1+t.pk2" <--- the original `Projection` at level 1 get added with `t.pk1+t.pk2`
     ///  Sort: t.pk1+t.pk2
     ///    Projection: t.number, t.pk1, t.pk2
     /// ```
-    /// Making `MergeSort` can have `t.pk1` as input.
+    /// Making `MergeSort` can have `t.pk1+t.pk2` as input.
     /// Meanwhile `Projection` at level 3 doesn't need to add any new column because 3 > 2
     /// and col requirements at level 2 is not applicable for level 3.
     ///
@@ -392,10 +392,11 @@ impl PlanRewriter {
                     && ext_b.node.name() == MergeSortLogicalPlan::name()
                 {
                     // revert last `ConditionalCommutative` result for Sort plan in this case.
-                    // `update_column_requirements` left unchanged because Sort won't generate
-                    // new columns or remove existing columns.
+                    // also need to remove any column requirements made by the Sort Plan
+                    // as it may refer to columns later no longer exist(rightfully) like by aggregate or projection
                     self.stage.pop();
                     self.expand_on_next_part_cond_trans_commutative = false;
+                    self.column_requirements.clear();
                 }
             }
             Commutativity::PartialCommutative => {
@@ -680,6 +681,10 @@ struct EnforceDistRequirementRewriter {
 
 impl EnforceDistRequirementRewriter {
     fn new(column_requirements: Vec<(HashSet<Column>, usize)>, cur_level: usize) -> Self {
+        debug!(
+            "Create EnforceDistRequirementRewriter with column_requirements: {:?} at cur_level: {}",
+            column_requirements, cur_level
+        );
         Self {
             column_requirements,
             cur_level,
@@ -733,7 +738,7 @@ impl EnforceDistRequirementRewriter {
                             .filter(|a| !a.is_empty())
                         else {
                             return Err(datafusion_common::DataFusionError::Internal(format!(
-                                "EnforceDistRequirementRewriter: no alias found for required column {original_col} in child plan {child} from original plan {original}",
+                                "EnforceDistRequirementRewriter: no alias found for required column {original_col} at level {level} in current node's child plan: \n{child} from original plan: \n{original}",
                             )));
                         };
 
diff --git a/src/query/src/dist_plan/analyzer/test.rs b/src/query/src/dist_plan/analyzer/test.rs
@@ -777,6 +777,67 @@ fn expand_step_aggr_proj() {
     assert_eq!(expected, result.to_string());
 }
 
+/// Make sure that `SeriesDivide` special handling correctly clean up column requirements from it's previous sort
+#[test]
+fn expand_complex_col_req_sort_pql() {
+    // use logging for better debugging
+    init_default_ut_logging();
+    let test_table = TestTable::table_with_name(0, "t".to_string());
+    let table_source = Arc::new(DefaultTableSource::new(Arc::new(
+        DfTableProviderAdapter::new(test_table),
+    )));
+
+    let plan = LogicalPlanBuilder::scan_with_filters("t", table_source.clone(), None, vec![])
+        .unwrap()
+        .sort(vec![
+            col("pk1").sort(true, false),
+            col("pk2").sort(true, false),
+            col("pk3").sort(true, false), // make some col req here
+        ])
+        .unwrap()
+        .build()
+        .unwrap();
+    let plan = SeriesDivide::new(
+        vec!["pk1".to_string(), "pk2".to_string(), "pk3".to_string()],
+        "ts".to_string(),
+        plan,
+    );
+    let plan = LogicalPlan::Extension(datafusion_expr::Extension {
+        node: Arc::new(plan),
+    });
+
+    let plan = LogicalPlanBuilder::from(plan)
+        .aggregate(vec![col("pk1"), col("pk2")], vec![min(col("number"))])
+        .unwrap()
+        .sort(vec![
+            col("pk1").sort(true, false),
+            col("pk2").sort(true, false),
+        ])
+        .unwrap()
+        .project(vec![col("pk1"), col("pk2")])
+        .unwrap()
+        .build()
+        .unwrap();
+
+    let config = ConfigOptions::default();
+    let result = DistPlannerAnalyzer {}.analyze(plan, &config).unwrap();
+
+    let expected = [
+        "Projection: t.pk1, t.pk2",
+        "  MergeSort: t.pk1 ASC NULLS LAST, t.pk2 ASC NULLS LAST",
+        "    MergeScan [is_placeholder=false, remote_input=[",
+        "Projection: t.pk1, t.pk2",
+        "  Sort: t.pk1 ASC NULLS LAST, t.pk2 ASC NULLS LAST",
+        "    Aggregate: groupBy=[[t.pk1, t.pk2]], aggr=[[min(t.number)]]",
+        r#"      PromSeriesDivide: tags=["pk1", "pk2", "pk3"]"#,
+        "        Sort: t.pk1 ASC NULLS LAST, t.pk2 ASC NULLS LAST, t.pk3 ASC NULLS LAST",
+        "          TableScan: t",
+        "]]",
+    ]
+    .join("\n");
+    assert_eq!(expected, result.to_string());
+}
+
 /// should only expand `Sort`, notice `Sort` before `Aggregate` usually can and
 /// will be optimized out, and dist planner shouldn't handle that case, but
 /// for now, still handle that be expanding the `Sort` node