Skip to content

Commit bc6b2cd

Browse files
andygrovecomphead
andauthored
chore: Add criterion benchmarks for casting between integer types (#401)
* Add cargo bench for casting between int types * Update core/benches/cast_from_string.rs Co-authored-by: comphead <[email protected]> --------- Co-authored-by: comphead <[email protected]>
1 parent 1403380 commit bc6b2cd

File tree

3 files changed

+104
-15
lines changed

3 files changed

+104
-15
lines changed

core/Cargo.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,5 +119,9 @@ name = "row_columnar"
119119
harness = false
120120

121121
[[bench]]
122-
name = "cast"
122+
name = "cast_from_string"
123+
harness = false
124+
125+
[[bench]]
126+
name = "cast_numeric"
123127
harness = false
Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,7 @@ use datafusion_physical_expr::{expressions::Column, PhysicalExpr};
2323
use std::sync::Arc;
2424

2525
fn criterion_benchmark(c: &mut Criterion) {
26-
let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, true)]));
27-
let mut b = StringBuilder::new();
28-
for i in 0..1000 {
29-
if i % 10 == 0 {
30-
b.append_null();
31-
} else if i % 2 == 0 {
32-
b.append_value(format!("{}", rand::random::<f64>()));
33-
} else {
34-
b.append_value(format!("{}", rand::random::<i64>()));
35-
}
36-
}
37-
let array = b.finish();
38-
let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(array)]).unwrap();
26+
let batch = create_utf8_batch();
3927
let expr = Arc::new(Column::new("a", 0));
4028
let timezone = "".to_string();
4129
let cast_string_to_i8 = Cast::new(
@@ -58,7 +46,7 @@ fn criterion_benchmark(c: &mut Criterion) {
5846
);
5947
let cast_string_to_i64 = Cast::new(expr, DataType::Int64, EvalMode::Legacy, timezone);
6048

61-
let mut group = c.benchmark_group("cast");
49+
let mut group = c.benchmark_group("cast_string_to_int");
6250
group.bench_function("cast_string_to_i8", |b| {
6351
b.iter(|| cast_string_to_i8.evaluate(&batch).unwrap());
6452
});
@@ -73,6 +61,24 @@ fn criterion_benchmark(c: &mut Criterion) {
7361
});
7462
}
7563

64+
// Create UTF8 batch with strings representing ints, floats, nulls
65+
fn create_utf8_batch() -> RecordBatch {
66+
let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, true)]));
67+
let mut b = StringBuilder::new();
68+
for i in 0..1000 {
69+
if i % 10 == 0 {
70+
b.append_null();
71+
} else if i % 2 == 0 {
72+
b.append_value(format!("{}", rand::random::<f64>()));
73+
} else {
74+
b.append_value(format!("{}", rand::random::<i64>()));
75+
}
76+
}
77+
let array = b.finish();
78+
let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(array)]).unwrap();
79+
batch
80+
}
81+
7682
fn config() -> Criterion {
7783
Criterion::default()
7884
}

core/benches/cast_numeric.rs

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use arrow_array::{builder::Int32Builder, RecordBatch};
19+
use arrow_schema::{DataType, Field, Schema};
20+
use comet::execution::datafusion::expressions::cast::{Cast, EvalMode};
21+
use criterion::{criterion_group, criterion_main, Criterion};
22+
use datafusion_physical_expr::{expressions::Column, PhysicalExpr};
23+
use std::sync::Arc;
24+
25+
fn criterion_benchmark(c: &mut Criterion) {
26+
let batch = create_int32_batch();
27+
let expr = Arc::new(Column::new("a", 0));
28+
let timezone = "".to_string();
29+
let cast_i32_to_i8 = Cast::new(
30+
expr.clone(),
31+
DataType::Int8,
32+
EvalMode::Legacy,
33+
timezone.clone(),
34+
);
35+
let cast_i32_to_i16 = Cast::new(
36+
expr.clone(),
37+
DataType::Int16,
38+
EvalMode::Legacy,
39+
timezone.clone(),
40+
);
41+
let cast_i32_to_i64 = Cast::new(expr, DataType::Int64, EvalMode::Legacy, timezone);
42+
43+
let mut group = c.benchmark_group("cast_int_to_int");
44+
group.bench_function("cast_i32_to_i8", |b| {
45+
b.iter(|| cast_i32_to_i8.evaluate(&batch).unwrap());
46+
});
47+
group.bench_function("cast_i32_to_i16", |b| {
48+
b.iter(|| cast_i32_to_i16.evaluate(&batch).unwrap());
49+
});
50+
group.bench_function("cast_i32_to_i64", |b| {
51+
b.iter(|| cast_i32_to_i64.evaluate(&batch).unwrap());
52+
});
53+
}
54+
55+
fn create_int32_batch() -> RecordBatch {
56+
let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
57+
let mut b = Int32Builder::new();
58+
for i in 0..1000 {
59+
if i % 10 == 0 {
60+
b.append_null();
61+
} else {
62+
b.append_value(rand::random::<i32>());
63+
}
64+
}
65+
let array = b.finish();
66+
let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(array)]).unwrap();
67+
batch
68+
}
69+
70+
fn config() -> Criterion {
71+
Criterion::default()
72+
}
73+
74+
criterion_group! {
75+
name = benches;
76+
config = config();
77+
targets = criterion_benchmark
78+
}
79+
criterion_main!(benches);

0 commit comments

Comments
 (0)