Skip to content

Commit 2ad1e87

Browse files
committed
fix imports, move fsst+dict bench into fsst
Signed-off-by: Onur Satici <[email protected]>
1 parent c28c4b1 commit 2ad1e87

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+231
-252
lines changed

Cargo.lock

Lines changed: 0 additions & 27 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,6 @@ vortex-compute = { version = "0.1.0", path = "./vortex-compute", default-feature
226226
vortex-datafusion = { version = "0.1.0", path = "./vortex-datafusion", default-features = false }
227227
vortex-datetime-parts = { version = "0.1.0", path = "./encodings/datetime-parts", default-features = false }
228228
vortex-decimal-byte-parts = { version = "0.1.0", path = "encodings/decimal-byte-parts", default-features = false }
229-
vortex-dict = { version = "0.1.0", path = "./encodings/dict", default-features = false }
230229
vortex-dtype = { version = "0.1.0", path = "./vortex-dtype", default-features = false }
231230
vortex-error = { version = "0.1.0", path = "./vortex-error", default-features = false }
232231
vortex-fastlanes = { version = "0.1.0", path = "./encodings/fastlanes", default-features = false }

encodings/fsst/Cargo.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ workspace = true
2020
async-trait = { workspace = true }
2121
fsst-rs = { workspace = true }
2222
prost = { workspace = true }
23+
rand = { workspace = true, optional = true }
2324
vortex-array = { workspace = true }
2425
vortex-buffer = { workspace = true }
2526
vortex-dtype = { workspace = true }
@@ -28,6 +29,9 @@ vortex-mask = { workspace = true }
2829
vortex-scalar = { workspace = true }
2930
vortex-vector = { workspace = true }
3031

32+
[features]
33+
test-harness = ["dep:rand", "vortex-array/test-harness"]
34+
3135
[dev-dependencies]
3236
divan = { workspace = true }
3337
itertools = { workspace = true }
@@ -38,3 +42,8 @@ vortex-array = { workspace = true, features = ["test-harness"] }
3842
[[bench]]
3943
name = "fsst_compress"
4044
harness = false
45+
46+
[[bench]]
47+
name = "chunked_dict_array_builder"
48+
harness = false
49+
required-features = ["test-harness"]

vortex-array/benches/chunked_dict_array_builder.rs renamed to encodings/fsst/benches/chunked_dict_array_builder.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@
44
use divan::Bencher;
55
use rand::distr::{Distribution, StandardUniform};
66
use vortex_array::arrays::ChunkedArray;
7+
use vortex_array::arrays::dict_test::gen_dict_primitive_chunks;
78
use vortex_array::builders::builder_with_capacity;
89
use vortex_array::compute::warm_up_vtables;
910
use vortex_array::{Array, ArrayRef, IntoArray};
10-
use vortex_dict::test::{gen_dict_fsst_test_data, gen_dict_primitive_chunks};
1111
use vortex_dtype::NativePType;
12+
use vortex_fsst::test_utils::gen_dict_fsst_test_data;
1213

1314
fn main() {
1415
warm_up_vtables();

encodings/fsst/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ mod compress;
1717
mod compute;
1818
mod ops;
1919
mod serde;
20+
#[cfg(feature = "test-harness")]
21+
pub mod test_utils;
2022
#[cfg(test)]
2123
mod tests;
2224

encodings/fsst/src/test_utils.rs

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
#![allow(clippy::unwrap_used)]
5+
6+
use rand::prelude::StdRng;
7+
use rand::{Rng, SeedableRng};
8+
use vortex_array::arrays::{DictArray, PrimitiveArray, VarBinArray};
9+
use vortex_array::{ArrayRef, IntoArray};
10+
use vortex_dtype::{DType, NativePType, Nullability};
11+
use vortex_error::VortexUnwrap;
12+
13+
use crate::{fsst_compress, fsst_train_compressor};
14+
15+
pub fn gen_fsst_test_data(len: usize, avg_str_len: usize, unique_chars: u8) -> ArrayRef {
16+
let mut rng = StdRng::seed_from_u64(0);
17+
let mut strings = Vec::with_capacity(len);
18+
19+
for _ in 0..len {
20+
// Generate a random string with length around `avg_len`. The number of possible
21+
// characters within the random string is defined by `unique_chars`.
22+
let len = avg_str_len * rng.random_range(50..=150) / 100;
23+
strings.push(Some(
24+
(0..len)
25+
.map(|_| rng.random_range(b'a'..(b'a' + unique_chars)))
26+
.collect::<Vec<u8>>(),
27+
));
28+
}
29+
30+
let varbin = VarBinArray::from_iter(
31+
strings
32+
.into_iter()
33+
.map(|opt_s| opt_s.map(Vec::into_boxed_slice)),
34+
DType::Binary(Nullability::NonNullable),
35+
);
36+
let compressor = fsst_train_compressor(varbin.as_ref()).vortex_unwrap();
37+
38+
fsst_compress(varbin.as_ref(), &compressor)
39+
.vortex_unwrap()
40+
.into_array()
41+
}
42+
43+
pub fn gen_dict_fsst_test_data<T: NativePType>(
44+
len: usize,
45+
unique_values: usize,
46+
str_len: usize,
47+
unique_char_count: u8,
48+
) -> DictArray {
49+
let values = gen_fsst_test_data(len, str_len, unique_char_count);
50+
let mut rng = StdRng::seed_from_u64(0);
51+
let codes = (0..len)
52+
.map(|_| T::from(rng.random_range(0..unique_values)).unwrap())
53+
.collect::<PrimitiveArray>();
54+
DictArray::try_new(codes.into_array(), values).vortex_unwrap()
55+
}

vortex-array/benches/dict_compare.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
use std::str::from_utf8;
77

88
use vortex_array::accessor::ArrayAccessor;
9+
use vortex_array::arrays::dict_test::{gen_primitive_for_dict, gen_varbin_words};
910
use vortex_array::arrays::{ConstantArray, VarBinArray, VarBinViewArray};
11+
use vortex_array::builders::dict::dict_encode;
1012
use vortex_array::compute::{Operator, compare, warm_up_vtables};
11-
use vortex_dict::builders::dict_encode;
12-
use vortex_dict::test::{gen_primitive_for_dict, gen_varbin_words};
1313

1414
fn main() {
1515
warm_up_vtables();

vortex-array/benches/dict_compress.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55

66
use divan::Bencher;
77
use rand::distr::{Distribution, StandardUniform};
8+
use vortex_array::arrays::dict_test::{gen_primitive_for_dict, gen_varbin_words};
89
use vortex_array::arrays::{VarBinArray, VarBinViewArray};
10+
use vortex_array::builders::dict::dict_encode;
911
use vortex_array::compute::warm_up_vtables;
10-
use vortex_dict::builders::dict_encode;
11-
use vortex_dict::test::{gen_primitive_for_dict, gen_varbin_words};
1212
use vortex_dtype::NativePType;
1313

1414
fn main() {

vortex-array/benches/dict_mask.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,8 @@ use divan::Bencher;
77
use rand::rngs::StdRng;
88
use rand::{Rng, SeedableRng};
99
use vortex_array::IntoArray;
10-
use vortex_array::arrays::PrimitiveArray;
10+
use vortex_array::arrays::{DictArray, PrimitiveArray};
1111
use vortex_array::compute::{mask, warm_up_vtables};
12-
use vortex_dict::DictArray;
1312
use vortex_mask::Mask;
1413

1514
fn main() {

vortex-array/src/arrays/dict/array.rs

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,17 @@
44
use std::fmt::Debug;
55
use std::hash::Hash;
66

7-
use vortex_array::stats::{ArrayStats, StatsSetRef};
8-
use vortex_array::vtable::{ArrayVTable, NotSupported, VTable, ValidityVTable};
9-
use vortex_array::{
10-
Array, ArrayEq, ArrayHash, ArrayRef, EncodingId, EncodingRef, Precision, ToCanonical, vtable,
11-
};
127
use vortex_buffer::BitBuffer;
138
use vortex_dtype::{DType, match_each_integer_ptype};
149
use vortex_error::{VortexExpect as _, VortexResult, vortex_bail};
1510
use vortex_mask::{AllOr, Mask};
1611

12+
use crate::stats::{ArrayStats, StatsSetRef};
13+
use crate::vtable::{ArrayVTable, NotSupported, VTable, ValidityVTable};
14+
use crate::{
15+
Array, ArrayEq, ArrayHash, ArrayRef, EncodingId, EncodingRef, Precision, ToCanonical, vtable,
16+
};
17+
1718
vtable!(Dict);
1819

1920
impl VTable for DictVTable {
@@ -194,17 +195,17 @@ mod test {
194195
use rand::distr::{Distribution, StandardUniform};
195196
use rand::prelude::StdRng;
196197
use rand::{Rng, SeedableRng};
197-
use vortex_array::arrays::{ChunkedArray, PrimitiveArray};
198-
use vortex_array::builders::builder_with_capacity;
199-
use vortex_array::validity::Validity;
200-
use vortex_array::{Array, ArrayRef, IntoArray, ToCanonical, assert_arrays_eq};
201198
use vortex_buffer::{BitBuffer, buffer};
202199
use vortex_dtype::Nullability::NonNullable;
203200
use vortex_dtype::{DType, NativePType, PType, UnsignedPType};
204201
use vortex_error::{VortexExpect, VortexUnwrap, vortex_panic};
205202
use vortex_mask::AllOr;
206203

207-
use crate::DictArray;
204+
use crate::arrays::dict::DictArray;
205+
use crate::arrays::{ChunkedArray, PrimitiveArray};
206+
use crate::builders::builder_with_capacity;
207+
use crate::validity::Validity;
208+
use crate::{Array, ArrayRef, IntoArray, ToCanonical, assert_arrays_eq};
208209

209210
#[test]
210211
fn nullable_codes_validity() {

0 commit comments

Comments
 (0)