Skip to content

Commit b6ce6fe

Browse files
committed
Add left function benchmark
1 parent b818f93 commit b6ce6fe

File tree

2 files changed

+116
-0
lines changed

2 files changed

+116
-0
lines changed

datafusion/functions/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,3 +294,8 @@ required-features = ["unicode_expressions"]
294294
harness = false
295295
name = "levenshtein"
296296
required-features = ["unicode_expressions"]
297+
298+
[[bench]]
299+
harness = false
300+
name = "left"
301+
required-features = ["unicode_expressions"]
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
extern crate criterion;
19+
20+
use std::hint::black_box;
21+
use std::sync::Arc;
22+
23+
use arrow::array::{ArrayRef, Int64Array};
24+
use arrow::datatypes::{DataType, Field};
25+
use arrow::util::bench_util::create_string_array_with_len;
26+
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
27+
use datafusion_common::config::ConfigOptions;
28+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
29+
use datafusion_functions::unicode::left;
30+
31+
fn create_args(size: usize, str_len: usize, use_negative: bool) -> Vec<ColumnarValue> {
32+
let string_array = Arc::new(create_string_array_with_len::<i32>(size, 0.1, str_len));
33+
34+
// For negative n, we want to trigger the double-iteration code path
35+
let n_values: Vec<i64> = if use_negative {
36+
(0..size).map(|i| -((i % 10 + 1) as i64)).collect()
37+
} else {
38+
(0..size).map(|i| (i % 10 + 1) as i64).collect()
39+
};
40+
let n_array = Arc::new(Int64Array::from(n_values));
41+
42+
vec![
43+
ColumnarValue::Array(string_array),
44+
ColumnarValue::Array(Arc::clone(&n_array) as ArrayRef),
45+
]
46+
}
47+
48+
fn criterion_benchmark(c: &mut Criterion) {
49+
for size in [1024, 4096] {
50+
let mut group = c.benchmark_group(format!("left size={size}"));
51+
52+
// Benchmark with positive n (no optimization needed)
53+
let args = create_args(size, 32, false);
54+
group.bench_function(BenchmarkId::new("positive n", size), |b| {
55+
let arg_fields = args
56+
.iter()
57+
.enumerate()
58+
.map(|(idx, arg)| {
59+
Field::new(format!("arg_{idx}"), arg.data_type(), true).into()
60+
})
61+
.collect::<Vec<_>>();
62+
let config_options = Arc::new(ConfigOptions::default());
63+
64+
b.iter(|| {
65+
black_box(
66+
left()
67+
.invoke_with_args(ScalarFunctionArgs {
68+
args: args.clone(),
69+
arg_fields: arg_fields.clone(),
70+
number_rows: size,
71+
return_field: Field::new("f", DataType::Utf8, true).into(),
72+
config_options: Arc::clone(&config_options),
73+
})
74+
.expect("left should work"),
75+
)
76+
})
77+
});
78+
79+
// Benchmark with negative n (triggers optimization)
80+
let args = create_args(size, 32, true);
81+
group.bench_function(BenchmarkId::new("negative n", size), |b| {
82+
let arg_fields = args
83+
.iter()
84+
.enumerate()
85+
.map(|(idx, arg)| {
86+
Field::new(format!("arg_{idx}"), arg.data_type(), true).into()
87+
})
88+
.collect::<Vec<_>>();
89+
let config_options = Arc::new(ConfigOptions::default());
90+
91+
b.iter(|| {
92+
black_box(
93+
left()
94+
.invoke_with_args(ScalarFunctionArgs {
95+
args: args.clone(),
96+
arg_fields: arg_fields.clone(),
97+
number_rows: size,
98+
return_field: Field::new("f", DataType::Utf8, true).into(),
99+
config_options: Arc::clone(&config_options),
100+
})
101+
.expect("left should work"),
102+
)
103+
})
104+
});
105+
106+
group.finish();
107+
}
108+
}
109+
110+
criterion_group!(benches, criterion_benchmark);
111+
criterion_main!(benches);

0 commit comments

Comments
 (0)