Recreate group_values after spill merge to fix duplicate group keys (apache#20724)

gboucher90 · alamb · commit bf857444b6fd · 2026-03-12T18:02:52.000-04:00
When switching to streaming merge after spill, group_ordering is set to
Full but group_values is not recreated. The existing GroupValuesColumn&lt;false&gt;
uses vectorized_intern which can produce non-monotonic group indices,
violating GroupOrderingFull's assumption and causing duplicate groups
in the output.

Fix: recreate group_values with the correct streaming mode after
updating group_ordering in update_merged_stream().
diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -1233,6 +1233,18 @@ impl GroupedHashAggregateStream {
             // on the grouping columns.
             self.group_ordering = GroupOrdering::Full(GroupOrderingFull::new());
 
+            // Recreate group_values to use streaming mode (GroupValuesColumn<true>
+            // with scalarized_intern) which preserves input row order, as required
+            // by GroupOrderingFull. This is only needed for multi-column group by,
+            // since single-column uses GroupValuesPrimitive which is always safe.
+            let group_schema = self
+                .spill_state
+                .merging_group_by
+                .group_schema(&self.spill_state.spill_schema)?;
+            if group_schema.fields().len() > 1 {
+                self.group_values = new_group_values(group_schema, &self.group_ordering)?;
+            }
+
             // Use `OutOfMemoryMode::ReportError` from this point on
             // to ensure we don't spill the spilled data to disk again.
             self.oom_mode = OutOfMemoryMode::ReportError;