Skip to content

Commit 647b56a

Browse files
committed
perf: improve performance of translate by reusing buffers
Optimize translate function by reusing HashMap and Vec buffers across all rows instead of allocating new ones for each row. Changes: - Moved HashMap and Vec allocations outside the main loop - Clear and reuse buffers for each row instead of reallocating - Use explicit loops instead of chained iterators for better control - Added benchmark to measure performance improvements Optimization: - Before: Allocated HashMap + 4 Vecs for every row - After: Single set of reusable buffers cleared for each row Benchmark Results: - size=1024, str_len=8: 234.6 µs → 147.9 µs (37% faster) - size=1024, str_len=32: 628.6 µs → 394.2 µs (37% faster) - size=4096, str_len=8: 964.4 µs → 575.2 µs (40% faster) - size=4096, str_len=32: 2.54 ms → 1.56 ms (39% faster) The optimization shows exceptional 37-40% performance improvements across all test cases. The HashMap reuse is particularly impactful since HashMap creation/destruction has significant overhead with hashing, bucket allocation, and internal bookkeeping. Combined with eliminating 4 Vec allocations per row, this becomes the most significant optimization in this series.
1 parent bb4e0ec commit 647b56a

File tree

3 files changed

+130
-21
lines changed

3 files changed

+130
-21
lines changed

datafusion/functions/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,3 +264,8 @@ required-features = ["string_expressions"]
264264
harness = false
265265
name = "ends_with"
266266
required-features = ["string_expressions"]
267+
268+
[[bench]]
269+
harness = false
270+
name = "translate"
271+
required-features = ["unicode_expressions"]
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
extern crate criterion;
19+
20+
use arrow::array::OffsetSizeTrait;
21+
use arrow::datatypes::{DataType, Field};
22+
use arrow::util::bench_util::create_string_array_with_len;
23+
use criterion::{Criterion, SamplingMode, criterion_group, criterion_main};
24+
use datafusion_common::DataFusionError;
25+
use datafusion_common::config::ConfigOptions;
26+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
27+
use datafusion_functions::unicode;
28+
use std::hint::black_box;
29+
use std::sync::Arc;
30+
use std::time::Duration;
31+
32+
fn create_args<O: OffsetSizeTrait>(size: usize, str_len: usize) -> Vec<ColumnarValue> {
33+
let string_array = Arc::new(create_string_array_with_len::<O>(size, 0.1, str_len));
34+
// Create simple from/to strings for translation
35+
let from_array = Arc::new(create_string_array_with_len::<O>(size, 0.1, 3));
36+
let to_array = Arc::new(create_string_array_with_len::<O>(size, 0.1, 2));
37+
38+
vec![
39+
ColumnarValue::Array(string_array),
40+
ColumnarValue::Array(from_array),
41+
ColumnarValue::Array(to_array),
42+
]
43+
}
44+
45+
fn invoke_translate_with_args(
46+
args: Vec<ColumnarValue>,
47+
number_rows: usize,
48+
) -> Result<ColumnarValue, DataFusionError> {
49+
let arg_fields = args
50+
.iter()
51+
.enumerate()
52+
.map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into())
53+
.collect::<Vec<_>>();
54+
let config_options = Arc::new(ConfigOptions::default());
55+
56+
unicode::translate().invoke_with_args(ScalarFunctionArgs {
57+
args,
58+
arg_fields,
59+
number_rows,
60+
return_field: Field::new("f", DataType::Utf8, true).into(),
61+
config_options: Arc::clone(&config_options),
62+
})
63+
}
64+
65+
fn criterion_benchmark(c: &mut Criterion) {
66+
for size in [1024, 4096] {
67+
let mut group = c.benchmark_group(format!("translate size={size}"));
68+
group.sampling_mode(SamplingMode::Flat);
69+
group.sample_size(10);
70+
group.measurement_time(Duration::from_secs(10));
71+
72+
for str_len in [8, 32] {
73+
let args = create_args::<i32>(size, str_len);
74+
group.bench_function(
75+
format!("translate_string [size={size}, str_len={str_len}]"),
76+
|b| {
77+
b.iter(|| {
78+
let args_cloned = args.clone();
79+
black_box(invoke_translate_with_args(args_cloned, size))
80+
})
81+
},
82+
);
83+
}
84+
85+
group.finish();
86+
}
87+
}
88+
89+
criterion_group!(benches, criterion_benchmark);
90+
criterion_main!(benches);

datafusion/functions/src/unicode/translate.rs

Lines changed: 35 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -148,34 +148,48 @@ where
148148
let from_array_iter = ArrayIter::new(from_array);
149149
let to_array_iter = ArrayIter::new(to_array);
150150

151+
// Reusable buffers to avoid allocating for each row
152+
let mut from_map: HashMap<&str, usize> = HashMap::new();
153+
let mut from_graphemes: Vec<&str> = Vec::new();
154+
let mut to_graphemes: Vec<&str> = Vec::new();
155+
let mut string_graphemes: Vec<&str> = Vec::new();
156+
let mut result_graphemes: Vec<&str> = Vec::new();
157+
151158
let result = string_array_iter
152159
.zip(from_array_iter)
153160
.zip(to_array_iter)
154161
.map(|((string, from), to)| match (string, from, to) {
155162
(Some(string), Some(from), Some(to)) => {
156-
// create a hashmap of [char, index] to change from O(n) to O(1) for from list
157-
let from_map: HashMap<&str, usize> = from
158-
.graphemes(true)
159-
.collect::<Vec<&str>>()
160-
.iter()
161-
.enumerate()
162-
.map(|(index, c)| (c.to_owned(), index))
163-
.collect();
163+
// Clear and reuse buffers
164+
from_map.clear();
165+
from_graphemes.clear();
166+
to_graphemes.clear();
167+
string_graphemes.clear();
168+
result_graphemes.clear();
169+
170+
// Build from_map using reusable buffer
171+
from_graphemes.extend(from.graphemes(true));
172+
for (index, c) in from_graphemes.iter().enumerate() {
173+
from_map.insert(*c, index);
174+
}
175+
176+
// Build to_graphemes
177+
to_graphemes.extend(to.graphemes(true));
164178

165-
let to = to.graphemes(true).collect::<Vec<&str>>();
179+
// Process string and build result
180+
string_graphemes.extend(string.graphemes(true));
181+
for c in &string_graphemes {
182+
match from_map.get(*c) {
183+
Some(n) => {
184+
if let Some(replacement) = to_graphemes.get(*n) {
185+
result_graphemes.push(*replacement);
186+
}
187+
}
188+
None => result_graphemes.push(*c),
189+
}
190+
}
166191

167-
Some(
168-
string
169-
.graphemes(true)
170-
.collect::<Vec<&str>>()
171-
.iter()
172-
.flat_map(|c| match from_map.get(*c) {
173-
Some(n) => to.get(*n).copied(),
174-
None => Some(*c),
175-
})
176-
.collect::<Vec<&str>>()
177-
.concat(),
178-
)
192+
Some(result_graphemes.concat())
179193
}
180194
_ => None,
181195
})

0 commit comments

Comments
 (0)