Skip to content

Commit 545c37f

Browse files
authored
perf: optimise right for byte access and StringView (#20069)
## Which issue does this PR close? - Closes #20068. ## Rationale for this change Similar to issue #19749 and the optimisation of `left` in #19980, it's worth doing the same for `right` ## What changes are included in this PR? - Improve efficiency of the function by making fewer memory allocations and going directly to bytes, based on char boundaries - Provide a specialisation for StringView with buffer zero-copy - Use `arrow_array::buffer::make_view` for low-level view manipulation (we still need to know about a magic constant 12 for a buffer layout) - Benchmark - up to 90% performance improvement ``` right size=1024/string_array positive n/1024 time: [24.286 µs 24.658 µs 25.087 µs] change: [−86.881% −86.662% −86.424%] (p = 0.00 < 0.05) Performance has improved. right size=1024/string_array negative n/1024 time: [29.996 µs 30.737 µs 31.511 µs] change: [−89.442% −89.229% −89.003%] (p = 0.00 < 0.05) Performance has improved. Found 1 outliers among 100 measurements (1.00%) 1 (1.00%) high mild right size=4096/string_array positive n/4096 time: [105.58 µs 109.39 µs 113.51 µs] change: [−86.119% −85.788% −85.497%] (p = 0.00 < 0.05) Performance has improved. Found 9 outliers among 100 measurements (9.00%) 6 (6.00%) high mild 3 (3.00%) high severe right size=4096/string_array negative n/4096 time: [136.48 µs 138.34 µs 140.36 µs] change: [−88.007% −87.848% −87.692%] (p = 0.00 < 0.05) Performance has improved. Found 4 outliers among 100 measurements (4.00%) 4 (4.00%) high mild right size=1024/string_view_array positive n/1024 time: [25.054 µs 25.500 µs 26.033 µs] change: [−82.569% −82.285% −81.891%] (p = 0.00 < 0.05) Performance has improved. right size=1024/string_view_array negative n/1024 time: [41.281 µs 42.730 µs 44.432 µs] change: [−73.832% −73.288% −72.716%] (p = 0.00 < 0.05) Performance has improved. Found 5 outliers among 100 measurements (5.00%) 3 (3.00%) high mild 2 (2.00%) high severe right size=4096/string_view_array positive n/4096 time: [129.38 µs 133.69 µs 137.61 µs] change: [−79.497% −78.998% −78.581%] (p = 0.00 < 0.05) Performance has improved. Found 4 outliers among 100 measurements (4.00%) 4 (4.00%) high mild right size=4096/string_view_array negative n/4096 time: [218.16 µs 229.41 µs 243.30 µs] change: [−65.405% −63.622% −61.515%] (p = 0.00 < 0.05) Performance has improved. Found 10 outliers among 100 measurements (10.00%) 3 (3.00%) high mild 7 (7.00%) high severe ``` ## Are these changes tested? - Existing unit tests for `right` - Added more unit tests - Added bench similar to `right.rs` - Existing SLTs pass ## Are there any user-facing changes? No
1 parent 1a0c2e0 commit 545c37f

File tree

4 files changed

+354
-48
lines changed

4 files changed

+354
-48
lines changed

datafusion/functions/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,11 @@ harness = false
311311
name = "left"
312312
required-features = ["unicode_expressions"]
313313

314+
[[bench]]
315+
harness = false
316+
name = "right"
317+
required-features = ["unicode_expressions"]
318+
314319
[[bench]]
315320
harness = false
316321
name = "factorial"
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
extern crate criterion;
19+
20+
use std::hint::black_box;
21+
use std::sync::Arc;
22+
23+
use arrow::array::{ArrayRef, Int64Array};
24+
use arrow::datatypes::{DataType, Field};
25+
use arrow::util::bench_util::{
26+
create_string_array_with_len, create_string_view_array_with_len,
27+
};
28+
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
29+
use datafusion_common::config::ConfigOptions;
30+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
31+
use datafusion_functions::unicode::right;
32+
33+
fn create_args(
34+
size: usize,
35+
str_len: usize,
36+
use_negative: bool,
37+
is_string_view: bool,
38+
) -> Vec<ColumnarValue> {
39+
let string_arg = if is_string_view {
40+
ColumnarValue::Array(Arc::new(create_string_view_array_with_len(
41+
size, 0.1, str_len, true,
42+
)))
43+
} else {
44+
ColumnarValue::Array(Arc::new(create_string_array_with_len::<i32>(
45+
size, 0.1, str_len,
46+
)))
47+
};
48+
49+
// For negative n, we want to trigger the double-iteration code path
50+
let n_values: Vec<i64> = if use_negative {
51+
(0..size).map(|i| -((i % 10 + 1) as i64)).collect()
52+
} else {
53+
(0..size).map(|i| (i % 10 + 1) as i64).collect()
54+
};
55+
let n_array = Arc::new(Int64Array::from(n_values));
56+
57+
vec![
58+
string_arg,
59+
ColumnarValue::Array(Arc::clone(&n_array) as ArrayRef),
60+
]
61+
}
62+
63+
fn criterion_benchmark(c: &mut Criterion) {
64+
for is_string_view in [false, true] {
65+
for size in [1024, 4096] {
66+
let mut group = c.benchmark_group(format!("right size={size}"));
67+
68+
// Benchmark with positive n (no optimization needed)
69+
let mut function_name = if is_string_view {
70+
"string_view_array positive n"
71+
} else {
72+
"string_array positive n"
73+
};
74+
let args = create_args(size, 32, false, is_string_view);
75+
group.bench_function(BenchmarkId::new(function_name, size), |b| {
76+
let arg_fields = args
77+
.iter()
78+
.enumerate()
79+
.map(|(idx, arg)| {
80+
Field::new(format!("arg_{idx}"), arg.data_type(), true).into()
81+
})
82+
.collect::<Vec<_>>();
83+
let return_type = if is_string_view {
84+
DataType::Utf8View
85+
} else {
86+
DataType::Utf8
87+
};
88+
let config_options = Arc::new(ConfigOptions::default());
89+
90+
b.iter(|| {
91+
black_box(
92+
right()
93+
.invoke_with_args(ScalarFunctionArgs {
94+
args: args.clone(),
95+
arg_fields: arg_fields.clone(),
96+
number_rows: size,
97+
return_field: Field::new("f", return_type.clone(), true)
98+
.into(),
99+
config_options: Arc::clone(&config_options),
100+
})
101+
.expect("right should work"),
102+
)
103+
})
104+
});
105+
106+
// Benchmark with negative n (triggers optimization)
107+
function_name = if is_string_view {
108+
"string_view_array negative n"
109+
} else {
110+
"string_array negative n"
111+
};
112+
let args = create_args(size, 32, true, is_string_view);
113+
group.bench_function(BenchmarkId::new(function_name, size), |b| {
114+
let arg_fields = args
115+
.iter()
116+
.enumerate()
117+
.map(|(idx, arg)| {
118+
Field::new(format!("arg_{idx}"), arg.data_type(), true).into()
119+
})
120+
.collect::<Vec<_>>();
121+
let return_type = if is_string_view {
122+
DataType::Utf8View
123+
} else {
124+
DataType::Utf8
125+
};
126+
let config_options = Arc::new(ConfigOptions::default());
127+
128+
b.iter(|| {
129+
black_box(
130+
right()
131+
.invoke_with_args(ScalarFunctionArgs {
132+
args: args.clone(),
133+
arg_fields: arg_fields.clone(),
134+
number_rows: size,
135+
return_field: Field::new("f", return_type.clone(), true)
136+
.into(),
137+
config_options: Arc::clone(&config_options),
138+
})
139+
.expect("right should work"),
140+
)
141+
})
142+
});
143+
144+
group.finish();
145+
}
146+
}
147+
}
148+
149+
criterion_group!(benches, criterion_benchmark);
150+
criterion_main!(benches);

datafusion/functions/src/unicode/left.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ fn left_byte_length(string: &str, n: i64) -> usize {
225225
Ordering::Equal => 0,
226226
Ordering::Greater => string
227227
.char_indices()
228-
.nth(n as usize)
228+
.nth(n.unsigned_abs() as usize)
229229
.map(|(index, _)| index)
230230
.unwrap_or(string.len()),
231231
}

0 commit comments

Comments
 (0)