Skip to content

Commit 815a4e2

Browse files
authored
Handwritten pipeline benchmark and analysis (#5408)
See the analysis in the file itself for more details. Here are the numbers I got on my AMD 7950X CPU: ``` Timer precision: 10 ns pipeline fastest │ slowest │ median │ mean │ samples │ iters ├─ correctness_verification │ │ │ │ │ │ ╰─ verify_all_methods │ │ │ │ │ │ ├─ 1024 21.67 µs │ 79.2 µs │ 28.41 µs │ 30.07 µs │ 3145 │ 3145 │ ╰─ 16384 2.105 ms │ 7.156 ms │ 2.4 ms │ 3.142 ms │ 100 │ 100 ╰─ decompress_benchmarks │ │ │ │ │ ├─ batch │ │ │ │ │ │ ├─ 1024 218.5 ns │ 3.677 µs │ 223.5 ns │ 225.4 ns │ 460458 │ 3683664 │ ├─ 8192 1.809 µs │ 29.41 µs │ 1.869 µs │ 1.88 µs │ 445049 │ 445049 │ ├─ 16384 3.589 µs │ 42.26 µs │ 3.669 µs │ 3.699 µs │ 245287 │ 245287 │ ├─ 65536 14.89 µs │ 41.38 µs │ 15.16 µs │ 15.4 µs │ 62931 │ 62931 │ ├─ 73728 16.92 µs │ 47.76 µs │ 18.15 µs │ 18.18 µs │ 53463 │ 53463 │ ├─ 86016 20.22 µs │ 95.48 µs │ 20.66 µs │ 20.95 µs │ 46441 │ 46441 │ ├─ 100352 23.92 µs │ 141.6 µs │ 24.46 µs │ 24.86 µs │ 39312 │ 39312 │ ╰─ 262144 64.36 µs │ 377.9 µs │ 66.16 µs │ 67.01 µs │ 14794 │ 14794 ├─ in_place_batch │ │ │ │ │ │ ├─ 1024 212.3 ns │ 5.053 µs │ 217.3 ns │ 218.9 ns │ 470820 │ 3766560 │ ├─ 8192 1.739 µs │ 30.48 µs │ 1.799 µs │ 1.809 µs │ 458869 │ 458869 │ ├─ 16384 3.459 µs │ 38.94 µs │ 3.529 µs │ 3.542 µs │ 255618 │ 255618 │ ├─ 65536 14.27 µs │ 53.86 µs │ 14.63 µs │ 14.76 µs │ 66042 │ 66042 │ ├─ 73728 15.52 µs │ 45.09 µs │ 15.94 µs │ 16.02 µs │ 60967 │ 60967 │ ├─ 86016 17.98 µs │ 80.87 µs │ 18.44 µs │ 18.54 µs │ 52746 │ 52746 │ ├─ 100352 20.85 µs │ 99.88 µs │ 21.46 µs │ 21.57 µs │ 45497 │ 45497 │ ╰─ 262144 58.74 µs │ 114.2 µs │ 59.04 µs │ 59.82 µs │ 16552 │ 16552 ├─ in_place_pipeline │ │ │ │ │ │ ├─ 1024 207.3 ns │ 9.616 µs │ 213.5 ns │ 214.3 ns │ 478786 │ 3830288 │ ├─ 8192 1.649 µs │ 245.5 µs │ 1.709 µs │ 1.719 µs │ 479444 │ 479444 │ ├─ 16384 3.289 µs │ 39.84 µs │ 3.359 µs │ 3.371 µs │ 267173 │ 267173 │ ├─ 65536 13.01 µs │ 45.35 µs │ 13.31 µs │ 13.41 µs │ 72524 │ 72524 │ ├─ 73728 14.7 µs │ 50.16 µs │ 14.99 µs │ 15.06 µs │ 64793 │ 64793 │ ├─ 86016 17.01 µs │ 43.34 µs │ 17.48 µs │ 17.59 µs │ 55669 │ 55669 │ ├─ 100352 20.02 µs │ 48.37 µs │ 20.4 µs │ 20.57 µs │ 47724 │ 47724 │ ╰─ 262144 52.46 µs │ 86.05 µs │ 53.27 µs │ 53.6 µs │ 18523 │ 18523 ├─ pipeline │ │ │ │ │ │ ├─ 1024 208.5 ns │ 6.634 µs │ 213.5 ns │ 215.5 ns │ 477313 │ 3818504 │ ├─ 8192 1.659 µs │ 24.7 µs │ 1.699 µs │ 1.707 µs │ 482728 │ 482728 │ ├─ 16384 3.289 µs │ 32.9 µs │ 3.369 µs │ 3.393 µs │ 265702 │ 265702 │ ├─ 65536 13.1 µs │ 33.87 µs │ 13.31 µs │ 13.4 µs │ 72563 │ 72563 │ ├─ 73728 15.09 µs │ 48.79 µs │ 15.36 µs │ 15.49 µs │ 63019 │ 63019 │ ├─ 86016 17.28 µs │ 44.51 µs │ 17.47 µs │ 17.62 µs │ 55584 │ 55584 │ ├─ 100352 20.24 µs │ 44.49 µs │ 20.62 µs │ 20.78 µs │ 47268 │ 47268 │ ╰─ 262144 54.16 µs │ 89.19 µs │ 55.6 µs │ 56.01 µs │ 17730 │ 17730 ╰─ pipeline_extra_copy │ │ │ │ │ ├─ 1024 213.5 ns │ 3.454 µs │ 232.3 ns │ 233.4 ns │ 446407 │ 3571256 ├─ 8192 1.649 µs │ 32.93 µs │ 1.779 µs │ 1.798 µs │ 461071 │ 461071 ├─ 16384 3.279 µs │ 19.76 µs │ 3.359 µs │ 3.379 µs │ 266614 │ 266614 ├─ 65536 12.97 µs │ 55.99 µs │ 13.27 µs │ 13.39 µs │ 72613 │ 72613 ├─ 73728 14.65 µs │ 52.98 µs │ 14.94 µs │ 15.07 µs │ 64736 │ 64736 ├─ 86016 17.15 µs │ 40.08 µs │ 17.43 µs │ 17.55 µs │ 55777 │ 55777 ├─ 100352 19.99 µs │ 59.83 µs │ 20.32 µs │ 20.45 µs │ 48023 │ 48023 ╰─ 262144 52.21 µs │ 72.9 µs │ 53.02 µs │ 53.33 µs │ 18614 │ 18614 ``` --------- Signed-off-by: Connor Tsui <[email protected]>
1 parent 71f0f00 commit 815a4e2

File tree

5 files changed

+974
-2
lines changed

5 files changed

+974
-2
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

encodings/alp/src/alp/mod.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,14 @@ pub trait ALPFloat: private::Sealed + Float + Display + NativePType {
225225
encoded.map_each_in_place(move |encoded| Self::decode_single(encoded, exponents))
226226
}
227227

228+
fn decode_into(encoded: &[Self::ALPInt], exponents: Exponents, output: &mut [Self]) {
229+
assert_eq!(encoded.len(), output.len());
230+
231+
for i in 0..encoded.len() {
232+
output[i] = Self::decode_single(encoded[i], exponents)
233+
}
234+
}
235+
228236
fn decode_slice_inplace(encoded: &mut [Self::ALPInt], exponents: Exponents) {
229237
let decoded: &mut [Self] = unsafe { transmute(encoded) };
230238
decoded.iter_mut().for_each(|v| {

vortex-array/src/arrays/dict/array.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::hash::Hash;
66

77
use vortex_buffer::{BitBuffer, ByteBuffer};
88
use vortex_dtype::{DType, Nullability, PType, match_each_integer_ptype};
9-
use vortex_error::{VortexExpect as _, VortexResult, vortex_bail, vortex_ensure, vortex_err};
9+
use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_ensure, vortex_err};
1010
use vortex_mask::{AllOr, Mask};
1111

1212
use crate::builders::dict::dict_encode;

vortex/Cargo.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ all-features = true
2020
workspace = true
2121

2222
[dependencies]
23+
fastlanes = { workspace = true }
24+
rand = { workspace = true }
2325
vortex-alp = { workspace = true }
2426
vortex-array = { workspace = true }
2527
vortex-btrblocks = { workspace = true }
@@ -57,7 +59,6 @@ divan = { workspace = true }
5759
itertools = { workspace = true }
5860
mimalloc = { workspace = true }
5961
parquet = { workspace = true }
60-
rand = { workspace = true }
6162
serde_json = { workspace = true }
6263
tokio = { workspace = true, features = ["full"] }
6364
tracing = { workspace = true }
@@ -92,3 +93,7 @@ test = false
9293
name = "common_encoding_tree_throughput"
9394
harness = false
9495
test = false
96+
97+
[[bench]]
98+
name = "pipeline"
99+
harness = false

0 commit comments

Comments
 (0)