apache
diff --git a/‎datafusion/core/benches/topk_aggregate.rs‎
Lines changed: 85 additions & 2 deletions b/‎datafusion/core/benches/topk_aggregate.rs‎
Lines changed: 85 additions & 2 deletions
diff --git a/‎datafusion/core/tests/physical_optimizer/aggregate_statistics.rs‎
Lines changed: 86 additions & 0 deletions b/‎datafusion/core/tests/physical_optimizer/aggregate_statistics.rs‎
Lines changed: 86 additions & 0 deletions
diff --git a/‎datafusion/physical-optimizer/src/topk_aggregation.rs‎
Lines changed: 4 additions & 17 deletions b/‎datafusion/physical-optimizer/src/topk_aggregation.rs‎
Lines changed: 4 additions & 17 deletions
diff --git a/‎datafusion/physical-plan/src/aggregates/mod.rs‎
Lines changed: 34 additions & 1 deletion b/‎datafusion/physical-plan/src/aggregates/mod.rs‎
Lines changed: 34 additions & 1 deletion
@@ -28,6 +28,8 @@ use std::hint::black_box;
 use std::sync::Arc;
 use tokio::runtime::Runtime;
 
+const LIMIT: usize = 10;
+
 async fn create_context(
     partition_cnt: i32,
     sample_cnt: i32,
@@ -52,6 +54,11 @@ fn run(rt: &Runtime, ctx: SessionContext, limit: usize, use_topk: bool, asc: boo
     black_box(rt.block_on(async { aggregate(ctx, limit, use_topk, asc).await })).unwrap();
 }
 
+fn run_string(rt: &Runtime, ctx: SessionContext, limit: usize, use_topk: bool) {
+    black_box(rt.block_on(async { aggregate_string(ctx, limit, use_topk).await }))
+        .unwrap();
+}
+
 async fn aggregate(
     ctx: SessionContext,
     limit: usize,
@@ -72,7 +79,7 @@ async fn aggregate(
     let batches = collect(plan, ctx.task_ctx()).await?;
     assert_eq!(batches.len(), 1);
     let batch = batches.first().unwrap();
-    assert_eq!(batch.num_rows(), 10);
+    assert_eq!(batch.num_rows(), LIMIT);
 
     let actual = format!("{}", pretty_format_batches(&batches)?).to_lowercase();
     let expected_asc = r#"
@@ -99,9 +106,36 @@ async fn aggregate(
     Ok(())
 }
 
+/// Benchmark for string aggregate functions with topk optimization.
+/// This tests grouping by a numeric column (timestamp_ms) and aggregating
+/// a string column (trace_id) with Utf8 or Utf8View data types.
+async fn aggregate_string(
+    ctx: SessionContext,
+    limit: usize,
+    use_topk: bool,
+) -> Result<()> {
+    let sql = format!(
+        "select max(trace_id) from traces group by timestamp_ms order by max(trace_id) desc limit {limit};"
+    );
+    let df = ctx.sql(sql.as_str()).await?;
+    let plan = df.create_physical_plan().await?;
+    let actual_phys_plan = displayable(plan.as_ref()).indent(true).to_string();
+    assert_eq!(
+        actual_phys_plan.contains(&format!("lim=[{limit}]")),
+        use_topk
+    );
+
+    let batches = collect(plan, ctx.task_ctx()).await?;
+    assert_eq!(batches.len(), 1);
+    let batch = batches.first().unwrap();
+    assert_eq!(batch.num_rows(), LIMIT);
+
+    Ok(())
+}
+
 fn criterion_benchmark(c: &mut Criterion) {
     let rt = Runtime::new().unwrap();
-    let limit = 10;
+    let limit = LIMIT;
     let partitions = 10;
     let samples = 1_000_000;
 
@@ -170,6 +204,55 @@ fn criterion_benchmark(c: &mut Criterion) {
         .as_str(),
         |b| b.iter(|| run(&rt, ctx.clone(), limit, true, true)),
     );
+
+    // String aggregate benchmarks - grouping by timestamp, aggregating string column
+    let ctx = rt
+        .block_on(create_context(partitions, samples, false, true, false))
+        .unwrap();
+    c.bench_function(
+        format!(
+            "top k={limit} string aggregate {} time-series rows [Utf8]",
+            partitions * samples
+        )
+        .as_str(),
+        |b| b.iter(|| run_string(&rt, ctx.clone(), limit, true)),
+    );
+
+    let ctx = rt
+        .block_on(create_context(partitions, samples, true, true, false))
+        .unwrap();
+    c.bench_function(
+        format!(
+            "top k={limit} string aggregate {} worst-case rows [Utf8]",
+            partitions * samples
+        )
+        .as_str(),
+        |b| b.iter(|| run_string(&rt, ctx.clone(), limit, true)),
+    );
+
+    let ctx = rt
+        .block_on(create_context(partitions, samples, false, true, true))
+        .unwrap();
+    c.bench_function(
+        format!(
+            "top k={limit} string aggregate {} time-series rows [Utf8View]",
+            partitions * samples
+        )
+        .as_str(),
+        |b| b.iter(|| run_string(&rt, ctx.clone(), limit, true)),
+    );
+
+    let ctx = rt
+        .block_on(create_context(partitions, samples, true, true, true))
+        .unwrap();
+    c.bench_function(
+        format!(
+            "top k={limit} string aggregate {} worst-case rows [Utf8View]",
+            partitions * samples
+        )
+        .as_str(),
+        |b| b.iter(|| run_string(&rt, ctx.clone(), limit, true)),
+    );
 }
 
 criterion_group!(benches, criterion_benchmark);
 
@@ -20,11 +20,15 @@ use std::sync::Arc;
 use crate::physical_optimizer::test_utils::TestAggregate;
 
 use arrow::array::Int32Array;
+use arrow::array::{Int64Array, StringArray};
 use arrow::datatypes::{DataType, Field, Schema};
 use arrow::record_batch::RecordBatch;
+use datafusion::datasource::memory::MemTable;
 use datafusion::datasource::memory::MemorySourceConfig;
 use datafusion::datasource::source::DataSourceExec;
+use datafusion::prelude::{SessionConfig, SessionContext};
 use datafusion_common::Result;
+use datafusion_common::assert_batches_eq;
 use datafusion_common::cast::as_int64_array;
 use datafusion_common::config::ConfigOptions;
 use datafusion_execution::TaskContext;
@@ -38,6 +42,7 @@ use datafusion_physical_plan::aggregates::AggregateMode;
 use datafusion_physical_plan::aggregates::PhysicalGroupBy;
 use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
 use datafusion_physical_plan::common;
+use datafusion_physical_plan::displayable;
 use datafusion_physical_plan::filter::FilterExec;
 use datafusion_physical_plan::projection::ProjectionExec;
 
@@ -316,3 +321,84 @@ async fn test_count_with_nulls_inexact_stat() -> Result<()> {
 
     Ok(())
 }
+
+/// Tests that TopK aggregation correctly handles UTF-8 (string) types in both grouping keys and aggregate values.
+///
+/// The TopK optimization is designed to efficiently handle `GROUP BY ... ORDER BY aggregate LIMIT n` queries
+/// by maintaining only the top K groups during aggregation. However, not all type combinations are supported.
+///
+/// This test verifies two scenarios:
+/// 1. **Supported case**: UTF-8 grouping key with numeric aggregate (max/min) - should use TopK optimization
+/// 2. **Unsupported case**: UTF-8 grouping key with UTF-8 aggregate value - must gracefully fall back to
+///    standard aggregation without panicking
+///
+/// The fallback behavior is critical because attempting to use TopK with unsupported types could cause
+/// runtime panics. This test ensures the optimizer correctly detects incompatible types and chooses
+/// the appropriate execution path.
+#[tokio::test]
+async fn utf8_grouping_min_max_limit_fallbacks() -> Result<()> {
+    let mut config = SessionConfig::new();
+    config.options_mut().optimizer.enable_topk_aggregation = true;
+    let ctx = SessionContext::new_with_config(config);
+
+    let batch = RecordBatch::try_new(
+        Arc::new(Schema::new(vec![
+            Field::new("g", DataType::Utf8, false),
+            Field::new("val_str", DataType::Utf8, false),
+            Field::new("val_num", DataType::Int64, false),
+        ])),
+        vec![
+            Arc::new(StringArray::from(vec!["a", "b", "a"])),
+            Arc::new(StringArray::from(vec!["alpha", "bravo", "charlie"])),
+            Arc::new(Int64Array::from(vec![1, 2, 3])),
+        ],
+    )?;
+    let table = MemTable::try_new(batch.schema(), vec![vec![batch]])?;
+    ctx.register_table("t", Arc::new(table))?;
+
+    // Supported path: numeric min/max with UTF-8 grouping should still use TopK aggregation
+    // and return correct results.
+    let supported_df = ctx
+        .sql("SELECT g, max(val_num) AS m FROM t GROUP BY g ORDER BY m DESC LIMIT 1")
+        .await?;
+    let supported_batches = supported_df.collect().await?;
+    assert_batches_eq!(
+        &[
+            "+---+---+",
+            "| g | m |",
+            "+---+---+",
+            "| a | 3 |",
+            "+---+---+"
+        ],
+        &supported_batches
+    );
+
+    // Unsupported TopK value type: string min/max should fall back without panicking.
+    let unsupported_df = ctx
+        .sql("SELECT g, max(val_str) AS s FROM t GROUP BY g ORDER BY s DESC LIMIT 1")
+        .await?;
+    let unsupported_plan = unsupported_df.clone().create_physical_plan().await?;
+    let unsupported_batches = unsupported_df.collect().await?;
+
+    // Ensure the plan avoided the TopK-specific stream implementation.
+    let plan_display = displayable(unsupported_plan.as_ref())
+        .indent(true)
+        .to_string();
+    assert!(
+        !plan_display.contains("GroupedTopKAggregateStream"),
+        "Unsupported UTF-8 aggregate value should not use TopK: {plan_display}"
+    );
+
+    assert_batches_eq!(
+        &[
+            "+---+---------+",
+            "| g | s       |",
+            "+---+---------+",
+            "| a | charlie |",
+            "+---+---------+"
+        ],
+        &unsupported_batches
+    );
+
+    Ok(())
+}
@@ -20,13 +20,12 @@
 use std::sync::Arc;
 
 use crate::PhysicalOptimizerRule;
-use arrow::datatypes::DataType;
 use datafusion_common::Result;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_plan::ExecutionPlan;
-use datafusion_physical_plan::aggregates::AggregateExec;
+use datafusion_physical_plan::aggregates::{AggregateExec, topk_types_supported};
 use datafusion_physical_plan::execution_plan::CardinalityEffect;
 use datafusion_physical_plan::projection::ProjectionExec;
 use datafusion_physical_plan::sorts::sort::SortExec;
@@ -55,11 +54,8 @@ impl TopKAggregation {
         }
         let group_key = aggr.group_expr().expr().iter().exactly_one().ok()?;
         let kt = group_key.0.data_type(&aggr.input().schema()).ok()?;
-        if !kt.is_primitive()
-            && kt != DataType::Utf8
-            && kt != DataType::Utf8View
-            && kt != DataType::LargeUtf8
-        {
+        let vt = field.data_type();
+        if !topk_types_supported(&kt, vt) {
             return None;
         }
         if aggr.filter_expr().iter().any(|e| e.is_some()) {
@@ -72,16 +68,7 @@ impl TopKAggregation {
         }
 
         // We found what we want: clone, copy the limit down, and return modified node
-        let new_aggr = AggregateExec::try_new(
-            *aggr.mode(),
-            aggr.group_expr().clone(),
-            aggr.aggr_expr().to_vec(),
-            aggr.filter_expr().to_vec(),
-            Arc::clone(aggr.input()),
-            aggr.input_schema(),
-        )
-        .expect("Unable to copy Aggregate!")
-        .with_limit(Some(limit));
+        let new_aggr = aggr.with_new_limit(Some(limit));
         Some(Arc::new(new_aggr))
     }
 
 
@@ -41,7 +41,7 @@ use parking_lot::Mutex;
 use std::collections::HashSet;
 
 use arrow::array::{ArrayRef, UInt8Array, UInt16Array, UInt32Array, UInt64Array};
-use arrow::datatypes::{Field, Schema, SchemaRef};
+use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use arrow_schema::FieldRef;
 use datafusion_common::stats::Precision;
@@ -64,6 +64,8 @@ use datafusion_physical_expr_common::sort_expr::{
 use datafusion_expr::utils::AggregateOrderSensitivity;
 use datafusion_physical_expr_common::utils::evaluate_expressions_to_arrays;
 use itertools::Itertools;
+use topk::hash_table::is_supported_hash_key_type;
+use topk::heap::is_supported_heap_type;
 
 pub mod group_values;
 mod no_grouping;
@@ -72,6 +74,17 @@ mod row_hash;
 mod topk;
 mod topk_stream;
 
+/// Returns true if TopK aggregation data structures support the provided key and value types.
+///
+/// This function checks whether both the key type (used for grouping) and value type
+/// (used in min/max aggregation) can be handled by the TopK aggregation heap and hash table.
+/// Supported types include Arrow primitives (integers, floats, decimals, intervals) and
+/// UTF-8 strings (`Utf8`, `LargeUtf8`, `Utf8View`).
+/// ```text
+pub fn topk_types_supported(key_type: &DataType, value_type: &DataType) -> bool {
+    is_supported_hash_key_type(key_type) && is_supported_heap_type(value_type)
+}
+
 /// Hard-coded seed for aggregations to ensure hash values differ from `RepartitionExec`, avoiding collisions.
 const AGGREGATION_HASH_SEED: ahash::RandomState =
     ahash::RandomState::with_seeds('A' as u64, 'G' as u64, 'G' as u64, 'R' as u64);
@@ -553,6 +566,26 @@ impl AggregateExec {
         }
     }
 
+    /// Clone this exec, overriding only the limit hint.
+    pub fn with_new_limit(&self, limit: Option<usize>) -> Self {
+        Self {
+            limit,
+            // clone the rest of the fields
+            required_input_ordering: self.required_input_ordering.clone(),
+            metrics: ExecutionPlanMetricsSet::new(),
+            input_order_mode: self.input_order_mode.clone(),
+            cache: self.cache.clone(),
+            mode: self.mode,
+            group_by: self.group_by.clone(),
+            aggr_expr: self.aggr_expr.clone(),
+            filter_expr: self.filter_expr.clone(),
+            input: Arc::clone(&self.input),
+            schema: Arc::clone(&self.schema),
+            input_schema: Arc::clone(&self.input_schema),
+            dynamic_filter: self.dynamic_filter.clone(),
+        }
+    }
+
     pub fn cache(&self) -> &PlanProperties {
         &self.cache
     }