apache · lyne7-sc · Feb 28, 2026 · Feb 28, 2026 · Feb 28, 2026 · Mar 1, 2026
diff --git a/datafusion/functions-nested/Cargo.toml b/datafusion/functions-nested/Cargo.toml
@@ -101,3 +101,7 @@ name = "array_set_ops"
 [[bench]]
 harness = false
 name = "array_position"
+
+[[bench]]
+harness = false
+name = "array_resize"
diff --git a/datafusion/functions-nested/benches/array_resize.rs b/datafusion/functions-nested/benches/array_resize.rs
@@ -0,0 +1,170 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{ArrayRef, Int64Array, ListArray};
+use arrow::buffer::OffsetBuffer;
+use arrow::datatypes::{DataType, Field};
+use criterion::{
+    BenchmarkGroup, Criterion, criterion_group, criterion_main, measurement::WallTime,
+};
+use datafusion_common::config::ConfigOptions;
+use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl};
+use datafusion_functions_nested::resize::ArrayResize;
+use std::hint::black_box;
+use std::sync::Arc;
+
+const NUM_ROWS: usize = 1_000;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let mut group = c.benchmark_group("array_resize_i64");
+    let list_field: Arc<Field> = Field::new_list_field(DataType::Int64, true).into();
+    let list_data_type = DataType::List(Arc::clone(&list_field));
+    let arg_fields = vec![
+        Field::new("array", list_data_type.clone(), true).into(),
+        Field::new("size", DataType::Int64, false).into(),
+        Field::new("value", DataType::Int64, true).into(),
+    ];
+    let return_field: Arc<Field> = Field::new("result", list_data_type, true).into();
+    let config_options = Arc::new(ConfigOptions::default());
+    let two_arg_fields = arg_fields[..2].to_vec();
+
+    bench_case(
+        &mut group,
+        "grow_uniform_fill_10_to_500",
+        &[
+            ColumnarValue::Array(create_int64_list_array(NUM_ROWS, 10)),
+            ColumnarValue::Array(repeated_int64_array(500)),
+            ColumnarValue::Array(repeated_int64_array(7)),
+        ],
+        &arg_fields,
+        &return_field,
+        &config_options,
+    );
+
+    bench_case(
+        &mut group,
+        "shrink_uniform_fill_500_to_10",
+        &[
+            ColumnarValue::Array(create_int64_list_array(NUM_ROWS, 500)),
+            ColumnarValue::Array(repeated_int64_array(10)),
+            ColumnarValue::Array(repeated_int64_array(7)),
+        ],
+        &arg_fields,
+        &return_field,
+        &config_options,
+    );
+
+    bench_case(
+        &mut group,
+        "grow_default_null_fill_10_to_500",
+        &[
+            ColumnarValue::Array(create_int64_list_array(NUM_ROWS, 10)),
+            ColumnarValue::Array(repeated_int64_array(500)),
+        ],
+        &two_arg_fields,
+        &return_field,
+        &config_options,
+    );
+
+    bench_case(
+        &mut group,
+        "grow_variable_fill_10_to_500",
+        &[
+            ColumnarValue::Array(create_int64_list_array(NUM_ROWS, 10)),
+            ColumnarValue::Array(repeated_int64_array(500)),
+            ColumnarValue::Array(distinct_fill_array()),
+        ],
+        &arg_fields,
+        &return_field,
+        &config_options,
+    );
+
+    bench_case(
+        &mut group,
+        "mixed_grow_shrink_1000x_100",
+        &[
+            ColumnarValue::Array(create_int64_list_array(NUM_ROWS, 100)),
+            ColumnarValue::Array(mixed_size_array()),
+        ],
+        &arg_fields[..2],
+        &return_field,
+        &config_options,
+    );
+
+    group.finish();
+}
+
+fn bench_case(
+    group: &mut BenchmarkGroup<'_, WallTime>,
+    name: &str,
+    args: &[ColumnarValue],
+    arg_fields: &[Arc<Field>],
+    return_field: &Arc<Field>,
+    config_options: &Arc<ConfigOptions>,
+) {
+    let udf = ArrayResize::new();
+    group.bench_function(name, |b| {
+        b.iter(|| {
+            black_box(
+                udf.invoke_with_args(ScalarFunctionArgs {
+                    args: args.to_vec(),
+                    arg_fields: arg_fields.to_vec(),
+                    number_rows: NUM_ROWS,
+                    return_field: return_field.clone(),
+                    config_options: config_options.clone(),
+                })
+                .unwrap(),
+            )
+        })
+    });
+}
+
+fn create_int64_list_array(num_rows: usize, list_len: usize) -> ArrayRef {
+    let values = (0..(num_rows * list_len))
+        .map(|v| Some(v as i64))
+        .collect::<Int64Array>();
+    let offsets = (0..=num_rows)
+        .map(|i| (i * list_len) as i32)
+        .collect::<Vec<i32>>();
+
+    Arc::new(
+        ListArray::try_new(
+            Arc::new(Field::new_list_field(DataType::Int64, true)),
+            OffsetBuffer::new(offsets.into()),
+            Arc::new(values),
+            None,
+        )
+        .unwrap(),
+    )
+}
+
+fn repeated_int64_array(value: i64) -> ArrayRef {
+    Arc::new(Int64Array::from_value(value, NUM_ROWS))
+}
+
+fn distinct_fill_array() -> ArrayRef {
+    Arc::new(Int64Array::from_iter((0..NUM_ROWS).map(|i| Some(i as i64))))
+}
+
+fn mixed_size_array() -> ArrayRef {
+    Arc::new(Int64Array::from_iter(
+        (0..NUM_ROWS).map(|i| Some(if i % 2 == 0 { 200_i64 } else { 10_i64 })),
+    ))
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions-nested/src/resize.rs b/datafusion/functions-nested/src/resize.rs
@@ -206,7 +206,103 @@ fn general_list_resize<O: OffsetSizeTrait + TryInto<i64>>(
     let values = array.values();
     let original_data = values.to_data();
 
-    // create default element array
+    // Track the largest per-row growth so the uniform-fill fast path can
+    // materialize one reusable fill buffer of the required size.
+    let mut max_extra: usize = 0;
+    for (row_index, offset_window) in array.offsets().windows(2).enumerate() {
+        if array.is_null(row_index) {
+            continue;
+        }
+        let target_count = count_array.value(row_index).to_usize().ok_or_else(|| {
+            internal_datafusion_err!("array_resize: failed to convert size to usize")
+        })?;
+        let current_len = (offset_window[1] - offset_window[0]).to_usize().unwrap();
+        if target_count > current_len {
+            let extra = target_count - current_len;
+            if extra > max_extra {
+                max_extra = extra;
+            }
-            if extra > max_extra {
-                max_extra = extra;
-            }
+            max_extra = max_extra.max(extra);
-            if extra > max_extra {
-                max_extra = extra;
-            }
+            max_extra = max_extra.max(extra);
+        }
+    }
+
+    // The fast path is valid when at least one row grows and every row would
+    // use the same fill value.
+    let is_uniform_fill = max_extra > 0
+        && match &default_element {
+            None => true,
+            Some(fill_array) => {
+                let len = fill_array.len();
+                let null_count = fill_array.logical_null_count();
+
+                len <= 1
+                    || null_count == len
+                    || (null_count == 0 && {
+                        let first = fill_array.slice(0, 1);
+                        (1..len)
+                            .all(|i| fill_array.slice(i, 1).as_ref() == first.as_ref())
+                    })
+            }
+        };
+
+    // Fast path: at least one row needs to grow and all rows share
+    // the same fill value.
+    if is_uniform_fill {
+        let fill_scalar = match &default_element {
+            None => ScalarValue::try_from(&data_type)?,
+            Some(fill_array) if fill_array.logical_null_count() == fill_array.len() => {
+                ScalarValue::try_from(&data_type)?
+            }
+            Some(fill_array) => ScalarValue::try_from_array(fill_array.as_ref(), 0)?,
+        };
+        let default_element = fill_scalar.to_array_of_size(max_extra)?;
+        let default_value_data = default_element.to_data();
+
+        let capacity = Capacities::Array(original_data.len() + default_value_data.len());
+        let mut offsets = vec![O::usize_as(0)];
+        let mut mutable = MutableArrayData::with_capacities(
+            vec![&original_data, &default_value_data],
+            false,
+            capacity,
+        );
+
+        let mut null_builder = NullBufferBuilder::new(array.len());
+
+        for (row_index, offset_window) in array.offsets().windows(2).enumerate() {
+            if array.is_null(row_index) {
+                null_builder.append_null();
+                offsets.push(offsets[row_index]);
+                continue;
+            }
+            null_builder.append_non_null();
+
+            let count = count_array.value(row_index).to_usize().ok_or_else(|| {
+                internal_datafusion_err!("array_resize: failed to convert size to usize")
+            })?;
+            let count = O::usize_as(count);
+            let start = offset_window[0];
+            if start + count > offset_window[1] {
+                let extra_count = (start + count - offset_window[1]).to_usize().unwrap();
+                let end = offset_window[1];
+                mutable.extend(0, (start).to_usize().unwrap(), (end).to_usize().unwrap());
+                mutable.extend(1, 0, extra_count);
+            } else {
+                let end = start + count;
+                mutable.extend(0, (start).to_usize().unwrap(), (end).to_usize().unwrap());
+            };
+            offsets.push(offsets[row_index] + count);
+        }
+
+        let data = mutable.freeze();
+
+        return Ok(Arc::new(GenericListArray::<O>::try_new(
+            Arc::clone(field),
+            OffsetBuffer::<O>::new(offsets.into()),
+            arrow::array::make_array(data),
+            null_builder.finish(),
+        )?));
+    }
+
+    // Slow path: each row may have a different fill value.
     let default_element = if let Some(default_element) = default_element {
         default_element
     } else {