perf: improve performance of translate by reusing buffers (#19533)

viirya · web-flow · commit 7c50448f5a64 · 2025-12-29T18:03:30.000Z
## Which issue does this PR close?  - Closes #. ## Rationale for this change  ## What changes are included in this PR?  Optimize translate function by reusing HashMap and Vec buffers across all rows instead of allocating new ones for each row. Changes: - Moved HashMap and Vec allocations outside the main loop - Clear and reuse buffers for each row instead of reallocating - Use explicit loops instead of chained iterators for better control - Added benchmark to measure performance improvements Optimization: - Before: Allocated HashMap + 4 Vecs for every row - After: Single set of reusable buffers cleared for each row Benchmark Results: - size=1024, str_len=8: 234.6 µs → 147.9 µs (37% faster) - size=1024, str_len=32: 628.6 µs → 394.2 µs (37% faster) - size=4096, str_len=8: 964.4 µs → 575.2 µs (40% faster) - size=4096, str_len=32: 2.54 ms → 1.56 ms (39% faster) The optimization shows exceptional 37-40% performance improvements across all test cases. The HashMap reuse is particularly impactful since HashMap creation/destruction has significant overhead with hashing, bucket allocation, and internal bookkeeping. Combined with eliminating 4 Vec allocations per row, this becomes the most significant optimization in this series. ## Are these changes tested?  ## Are there any user-facing changes?
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
@@ -270,6 +270,11 @@ harness = false
 name = "ends_with"
 required-features = ["string_expressions"]
 
+[[bench]]
+harness = false
+name = "translate"
+required-features = ["unicode_expressions"]
+
 [[bench]]
 harness = false
 name = "levenshtein"
diff --git a/datafusion/functions/benches/translate.rs b/datafusion/functions/benches/translate.rs
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use arrow::array::OffsetSizeTrait;
+use arrow::datatypes::{DataType, Field};
+use arrow::util::bench_util::create_string_array_with_len;
+use criterion::{Criterion, SamplingMode, criterion_group, criterion_main};
+use datafusion_common::DataFusionError;
+use datafusion_common::config::ConfigOptions;
+use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
+use datafusion_functions::unicode;
+use std::hint::black_box;
+use std::sync::Arc;
+use std::time::Duration;
+
+fn create_args<O: OffsetSizeTrait>(size: usize, str_len: usize) -> Vec<ColumnarValue> {
+    let string_array = Arc::new(create_string_array_with_len::<O>(size, 0.1, str_len));
+    // Create simple from/to strings for translation
+    let from_array = Arc::new(create_string_array_with_len::<O>(size, 0.1, 3));
+    let to_array = Arc::new(create_string_array_with_len::<O>(size, 0.1, 2));
+
+    vec![
+        ColumnarValue::Array(string_array),
+        ColumnarValue::Array(from_array),
+        ColumnarValue::Array(to_array),
+    ]
+}
+
+fn invoke_translate_with_args(
+    args: Vec<ColumnarValue>,
+    number_rows: usize,
+) -> Result<ColumnarValue, DataFusionError> {
+    let arg_fields = args
+        .iter()
+        .enumerate()
+        .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into())
+        .collect::<Vec<_>>();
+    let config_options = Arc::new(ConfigOptions::default());
+
+    unicode::translate().invoke_with_args(ScalarFunctionArgs {
+        args,
+        arg_fields,
+        number_rows,
+        return_field: Field::new("f", DataType::Utf8, true).into(),
+        config_options: Arc::clone(&config_options),
+    })
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    for size in [1024, 4096] {
+        let mut group = c.benchmark_group(format!("translate size={size}"));
+        group.sampling_mode(SamplingMode::Flat);
+        group.sample_size(10);
+        group.measurement_time(Duration::from_secs(10));
+
+        for str_len in [8, 32] {
+            let args = create_args::<i32>(size, str_len);
+            group.bench_function(
+                format!("translate_string [size={size}, str_len={str_len}]"),
+                |b| {
+                    b.iter(|| {
+                        let args_cloned = args.clone();
+                        black_box(invoke_translate_with_args(args_cloned, size))
+                    })
+                },
+            );
+        }
+
+        group.finish();
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions/src/unicode/translate.rs b/datafusion/functions/src/unicode/translate.rs
@@ -148,34 +148,48 @@ where
     let from_array_iter = ArrayIter::new(from_array);
     let to_array_iter = ArrayIter::new(to_array);
 
+    // Reusable buffers to avoid allocating for each row
+    let mut from_map: HashMap<&str, usize> = HashMap::new();
+    let mut from_graphemes: Vec<&str> = Vec::new();
+    let mut to_graphemes: Vec<&str> = Vec::new();
+    let mut string_graphemes: Vec<&str> = Vec::new();
+    let mut result_graphemes: Vec<&str> = Vec::new();
+
     let result = string_array_iter
         .zip(from_array_iter)
         .zip(to_array_iter)
         .map(|((string, from), to)| match (string, from, to) {
             (Some(string), Some(from), Some(to)) => {
-                // create a hashmap of [char, index] to change from O(n) to O(1) for from list
-                let from_map: HashMap<&str, usize> = from
-                    .graphemes(true)
-                    .collect::<Vec<&str>>()
-                    .iter()
-                    .enumerate()
-                    .map(|(index, c)| (c.to_owned(), index))
-                    .collect();
+                // Clear and reuse buffers
+                from_map.clear();
+                from_graphemes.clear();
+                to_graphemes.clear();
+                string_graphemes.clear();
+                result_graphemes.clear();
+
+                // Build from_map using reusable buffer
+                from_graphemes.extend(from.graphemes(true));
+                for (index, c) in from_graphemes.iter().enumerate() {
+                    from_map.insert(*c, index);
+                }
+
+                // Build to_graphemes
+                to_graphemes.extend(to.graphemes(true));
 
-                let to = to.graphemes(true).collect::<Vec<&str>>();
+                // Process string and build result
+                string_graphemes.extend(string.graphemes(true));
+                for c in &string_graphemes {
+                    match from_map.get(*c) {
+                        Some(n) => {
+                            if let Some(replacement) = to_graphemes.get(*n) {
+                                result_graphemes.push(*replacement);
+                            }
+                        }
+                        None => result_graphemes.push(*c),
+                    }
+                }
 
-                Some(
-                    string
-                        .graphemes(true)
-                        .collect::<Vec<&str>>()
-                        .iter()
-                        .flat_map(|c| match from_map.get(*c) {
-                            Some(n) => to.get(*n).copied(),
-                            None => Some(*c),
-                        })
-                        .collect::<Vec<&str>>()
-                        .concat(),
-                )
+                Some(result_graphemes.concat())
             }
             _ => None,
         })