Skip to content

Commit b1ebd68

Browse files
authored
break: remove roaring int/bool & run end bool arrays (#2020)
Roaring arrays require deserializing on each access (slow), and indices are often well-compressed by other methods (e.g., FastLanes delta). For validity, we're moving towards a more specialized approach in #2019 and away from the vanilla compressed array approach
1 parent ec1fa8b commit b1ebd68

File tree

34 files changed

+21
-1988
lines changed

34 files changed

+21
-1988
lines changed

Cargo.lock

Lines changed: 0 additions & 55 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,9 +153,7 @@ vortex-io = { version = "0.21.1", path = "./vortex-io" }
153153
vortex-ipc = { version = "0.21.1", path = "./vortex-ipc" }
154154
vortex-layout = { version = "0.21.1", path = "./vortex-layout" }
155155
vortex-proto = { version = "0.21.1", path = "./vortex-proto" }
156-
vortex-roaring = { version = "0.21.1", path = "./encodings/roaring" }
157156
vortex-runend = { version = "0.21.1", path = "./encodings/runend" }
158-
vortex-runend-bool = { version = "0.21.1", path = "./encodings/runend-bool" }
159157
vortex-scalar = { version = "0.21.1", path = "./vortex-scalar", default-features = false }
160158
vortex-scan = { version = "0.21.1", path = "./vortex-scan", default-features = false }
161159
vortex-sampling-compressor = { version = "0.21.1", path = "./vortex-sampling-compressor" }

bench-vortex/benches/compressor_throughput.rs

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ use rand::distributions::Alphanumeric;
44
use rand::seq::SliceRandom as _;
55
use rand::{thread_rng, Rng, SeedableRng as _};
66
use vortex::aliases::hash_set::HashSet;
7-
use vortex::array::{ConstantArray, VarBinViewArray};
7+
use vortex::array::VarBinViewArray;
88
use vortex::buffer::Buffer;
9-
use vortex::compute::{compare, try_cast, Operator};
9+
use vortex::compute::try_cast;
1010
use vortex::dtype::PType;
1111
use vortex::encodings::dict::dict_encode;
1212
use vortex::encodings::fsst::{fsst_compress, fsst_train_compressor};
@@ -18,14 +18,10 @@ use vortex::sampling_compressor::compressors::bitpacked::{
1818
use vortex::sampling_compressor::compressors::delta::DeltaCompressor;
1919
use vortex::sampling_compressor::compressors::dict::DictCompressor;
2020
use vortex::sampling_compressor::compressors::r#for::FoRCompressor;
21-
use vortex::sampling_compressor::compressors::roaring_bool::RoaringBoolCompressor;
22-
use vortex::sampling_compressor::compressors::roaring_int::RoaringIntCompressor;
2321
use vortex::sampling_compressor::compressors::runend::DEFAULT_RUN_END_COMPRESSOR;
24-
use vortex::sampling_compressor::compressors::runend_bool::RunEndBoolCompressor;
2522
use vortex::sampling_compressor::compressors::zigzag::ZigZagCompressor;
2623
use vortex::sampling_compressor::compressors::CompressorRef;
2724
use vortex::sampling_compressor::SamplingCompressor;
28-
use vortex::scalar::Scalar;
2925
use vortex::{IntoArrayData as _, IntoCanonical, ToArrayData};
3026

3127
#[global_allocator]
@@ -41,16 +37,6 @@ fn primitive(c: &mut Criterion) {
4137
let uint_array =
4238
Buffer::from_iter((0..num_values).map(|_| rng.gen_range(0u32..256))).into_array();
4339
let int_array = try_cast(uint_array.clone(), PType::I32.into()).unwrap();
44-
45-
let bool_array = compare(
46-
&uint_array,
47-
ConstantArray::new(Scalar::from(0u32), uint_array.len()),
48-
Operator::Eq,
49-
)
50-
.unwrap();
51-
52-
let index_array = Buffer::from_iter((0..num_values).map(|i| (i * 2) as u32 + 42)).into_array();
53-
5440
let float_array = try_cast(uint_array.clone(), PType::F32.into()).unwrap();
5541

5642
let compressors_names_and_arrays = [
@@ -63,9 +49,6 @@ fn primitive(c: &mut Criterion) {
6349
(&DEFAULT_RUN_END_COMPRESSOR, "runend", &uint_array),
6450
(&DeltaCompressor, "delta", &uint_array),
6551
(&DictCompressor, "dict", &uint_array),
66-
(&RoaringBoolCompressor, "roaring_bool", &bool_array),
67-
(&RoaringIntCompressor, "roaring_int", &index_array),
68-
(&RunEndBoolCompressor, "runend_bool", &bool_array),
6952
(&FoRCompressor, "frame_of_reference", &int_array),
7053
(&ZigZagCompressor, "zigzag", &int_array),
7154
(&ALPCompressor, "alp", &float_array),

bench-vortex/src/bin/notimplemented.rs

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,7 @@ use vortex::encodings::datetime_parts::DateTimePartsArray;
2020
use vortex::encodings::dict::DictArray;
2121
use vortex::encodings::fastlanes::{BitPackedArray, DeltaArray, FoRArray};
2222
use vortex::encodings::fsst::{fsst_compress, fsst_train_compressor};
23-
use vortex::encodings::roaring::{Bitmap, RoaringBoolArray, RoaringIntArray};
2423
use vortex::encodings::runend::RunEndArray;
25-
use vortex::encodings::runend_bool::RunEndBoolArray;
2624
use vortex::encodings::zigzag::ZigZagArray;
2725
use vortex::scalar::Scalar;
2826
use vortex::validity::Validity;
@@ -118,18 +116,9 @@ fn enc_impls() -> Vec<ArrayData> {
118116
.into_array(),
119117
NullArray::new(10).into_array(),
120118
buffer![0, 1].into_array(),
121-
RoaringBoolArray::try_new(Bitmap::from([0u32, 10, 20]), 30)
122-
.unwrap()
123-
.into_array(),
124-
RoaringIntArray::try_new(Bitmap::from([5u32, 6, 8]), PType::U32)
125-
.unwrap()
126-
.into_array(),
127119
RunEndArray::try_new(buffer![5u32, 8].into_array(), buffer![0, 1].into_array())
128120
.unwrap()
129121
.into_array(),
130-
RunEndBoolArray::try_new(buffer![5u32, 8].into_array(), true, Validity::NonNullable)
131-
.unwrap()
132-
.into_array(),
133122
SparseArray::try_new(
134123
buffer![5u64, 8].into_array(),
135124
PrimitiveArray::new(buffer![3u32, 6], Validity::AllValid).into_array(),

encodings/roaring/Cargo.toml

Lines changed: 0 additions & 31 deletions
This file was deleted.
-3 Bytes
Binary file not shown.
-21 Bytes
Binary file not shown.

encodings/roaring/src/boolean/compress.rs

Lines changed: 0 additions & 15 deletions
This file was deleted.

encodings/roaring/src/boolean/compute.rs

Lines changed: 0 additions & 94 deletions
This file was deleted.

0 commit comments

Comments
 (0)