Skip to content

Commit 0b30df9

Browse files
committed
add more rules and clean up
Signed-off-by: Connor Tsui <connor.tsui20@gmail.com>
1 parent 3612521 commit 0b30df9

File tree

4 files changed

+188
-160
lines changed

4 files changed

+188
-160
lines changed

vortex-btrblocks/src/builder.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use crate::SchemeId;
1313
use crate::schemes::decimal;
1414
use crate::schemes::float;
1515
use crate::schemes::integer;
16+
use crate::schemes::rle;
1617
use crate::schemes::string;
1718
use crate::schemes::temporal;
1819

@@ -26,13 +27,14 @@ pub const ALL_SCHEMES: &[&dyn Scheme] = &[
2627
&integer::IntConstantScheme,
2728
// NOTE: FoR must precede BitPacking to avoid unnecessary patches.
2829
&integer::FoRScheme,
29-
&integer::BitPackingScheme,
30+
// NOTE: ZigZag should precede BitPacking because we don't want negative numbers.
3031
&integer::ZigZagScheme,
32+
&integer::BitPackingScheme,
3133
&integer::SparseScheme,
3234
&integer::IntDictScheme,
3335
&integer::RunEndScheme,
3436
&integer::SequenceScheme,
35-
&integer::RLE_INTEGER_SCHEME,
37+
&rle::RLE_INTEGER_SCHEME,
3638
#[cfg(feature = "pco")]
3739
&integer::PcoScheme,
3840
// Float schemes.
@@ -42,7 +44,7 @@ pub const ALL_SCHEMES: &[&dyn Scheme] = &[
4244
&float::ALPRDScheme,
4345
&float::FloatDictScheme,
4446
&float::NullDominatedSparseScheme,
45-
&float::RLE_FLOAT_SCHEME,
47+
&rle::RLE_FLOAT_SCHEME,
4648
#[cfg(feature = "pco")]
4749
&float::PcoScheme,
4850
// Decimal schemes.

vortex-btrblocks/src/schemes/float.rs

Lines changed: 14 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,13 @@
33

44
//! Float compression schemes.
55
6-
use vortex_alp::ALP;
76
use vortex_alp::ALPArray;
87
use vortex_alp::RDEncoder;
98
use vortex_alp::alp_encode;
109
use vortex_array::ArrayRef;
1110
use vortex_array::Canonical;
1211
use vortex_array::IntoArray;
1312
use vortex_array::ToCanonical;
14-
use vortex_array::arrays::PrimitiveArray;
1513
use vortex_array::dtype::PType;
1614
use vortex_compressor::scheme::ChildSelection;
1715
use vortex_compressor::scheme::DescendantExclusion;
@@ -28,15 +26,12 @@ use crate::Scheme;
2826
use crate::SchemeExt;
2927
use crate::compress_patches;
3028
use crate::estimate_compression_ratio_with_sampling;
31-
use crate::schemes::rle;
32-
use crate::schemes::rle::RLEScheme;
33-
use crate::schemes::rle::RLEStats;
3429

3530
/// ALP (Adaptive Lossless floating-Point) encoding.
3631
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
3732
pub struct ALPScheme;
3833

39-
/// ALPRD (ALP with Right Division) encoding variant.
34+
/// ALPRD (ALP with Real Double) encoding variant.
4035
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4136
pub struct ALPRDScheme;
4237

@@ -51,47 +46,14 @@ pub struct NullDominatedSparseScheme;
5146
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5247
pub struct PcoScheme;
5348

54-
/// Configuration for float RLE compression.
55-
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
56-
pub struct FloatRLEConfig;
57-
5849
// Re-export builtin schemes from vortex-compressor.
5950
pub use vortex_compressor::builtins::FloatConstantScheme;
6051
pub use vortex_compressor::builtins::FloatDictScheme;
6152
pub use vortex_compressor::builtins::FloatUncompressedScheme;
6253
pub use vortex_compressor::builtins::is_float_primitive;
6354
pub use vortex_compressor::stats::FloatStats;
6455

65-
impl rle::RLEConfig for FloatRLEConfig {
66-
type Stats = FloatStats;
67-
68-
const SCHEME_NAME: &'static str = "vortex.float.rle";
69-
70-
fn matches(canonical: &Canonical) -> bool {
71-
is_float_primitive(canonical)
72-
}
73-
74-
fn generate_stats(array: &ArrayRef) -> FloatStats {
75-
FloatStats::generate(&array.to_primitive())
76-
}
77-
}
78-
79-
impl RLEStats for FloatStats {
80-
fn value_count(&self) -> u32 {
81-
FloatStats::value_count(self)
82-
}
83-
84-
fn average_run_length(&self) -> u32 {
85-
FloatStats::average_run_length(self)
86-
}
87-
88-
fn source(&self) -> &PrimitiveArray {
89-
FloatStats::source(self)
90-
}
91-
}
92-
93-
/// RLE scheme for float compression.
94-
pub const RLE_FLOAT_SCHEME: RLEScheme<FloatRLEConfig> = RLEScheme::new();
56+
pub use crate::schemes::rle::RLE_FLOAT_SCHEME;
9557

9658
impl Scheme for ALPScheme {
9759
fn scheme_name(&self) -> &'static str {
@@ -113,16 +75,14 @@ impl Scheme for ALPScheme {
11375
data: &mut ArrayAndStats,
11476
ctx: CompressorContext,
11577
) -> VortexResult<f64> {
116-
// ALP encodes floats as integers. Without integer compression afterward, the
117-
// encoded ints are the same size.
78+
// ALP encodes floats as integers. Without integer compression afterward, the encoded ints
79+
// are the same size.
11880
if ctx.finished_cascading() {
11981
return Ok(0.0);
12082
}
12183

122-
let stats = data.float_stats();
123-
12484
// We don't support ALP for f16.
125-
if stats.source().ptype() == PType::F16 {
85+
if data.float_stats().source().ptype() == PType::F16 {
12686
return Ok(0.0);
12787
}
12888

@@ -138,19 +98,16 @@ impl Scheme for ALPScheme {
13898
let stats = data.float_stats();
13999

140100
let alp_encoded = alp_encode(&stats.source().to_primitive(), None)?;
141-
let alp = alp_encoded.as_::<ALP>();
142-
let alp_ints = alp.encoded().to_primitive();
143101

144102
// Compress the ALP ints.
145-
146103
let compressed_alp_ints =
147-
compressor.compress_child(&alp_ints.into_array(), &ctx, self.id(), 0)?;
104+
compressor.compress_child(alp_encoded.encoded(), &ctx, self.id(), 0)?;
148105

149106
// Patches are not compressed. They should be infrequent, and if they are not then we want
150107
// to keep them linear for easy indexing.
151-
let patches = alp.patches().map(compress_patches).transpose()?;
108+
let patches = alp_encoded.patches().map(compress_patches).transpose()?;
152109

153-
Ok(ALPArray::new(compressed_alp_ints, alp.exponents(), patches).into_array())
110+
Ok(ALPArray::new(compressed_alp_ints, alp_encoded.exponents(), patches).into_array())
154111
}
155112
}
156113

@@ -169,9 +126,7 @@ impl Scheme for ALPRDScheme {
169126
data: &mut ArrayAndStats,
170127
ctx: CompressorContext,
171128
) -> VortexResult<f64> {
172-
let stats = data.float_stats();
173-
174-
if stats.source().ptype() == PType::F16 {
129+
if data.float_stats().source().ptype() == PType::F16 {
175130
return Ok(0.0);
176131
}
177132

@@ -235,11 +190,11 @@ impl Scheme for NullDominatedSparseScheme {
235190
let stats = data.float_stats();
236191

237192
if stats.value_count() == 0 {
238-
// All nulls should use ConstantScheme.
193+
// All nulls should use ConstantScheme instead of this.
239194
return Ok(0.0);
240195
}
241196

242-
// If the majority is null, will compress well.
197+
// If the majority (90%) of values is null, this will compress well.
243198
if stats.null_count() as f64 / stats.source().len() as f64 > 0.9 {
244199
return Ok(stats.source().len() as f64 / stats.value_count() as f64);
245200
}
@@ -261,12 +216,8 @@ impl Scheme for NullDominatedSparseScheme {
261216

262217
if let Some(sparse) = sparse_encoded.as_opt::<Sparse>() {
263218
let indices = sparse.patches().indices().to_primitive().narrow()?;
264-
let compressed_indices = compressor.compress_child(
265-
&indices.to_primitive().into_array(),
266-
&ctx,
267-
self.id(),
268-
0,
269-
)?;
219+
let compressed_indices =
220+
compressor.compress_child(&indices.into_array(), &ctx, self.id(), 0)?;
270221

271222
SparseArray::try_new(
272223
compressed_indices,
@@ -326,8 +277,8 @@ mod tests {
326277
use vortex_error::VortexResult;
327278
use vortex_fastlanes::RLE;
328279

329-
use super::RLE_FLOAT_SCHEME;
330280
use crate::BtrBlocksCompressor;
281+
use crate::schemes::rle::RLE_FLOAT_SCHEME;
331282

332283
#[test]
333284
fn test_empty() -> VortexResult<()> {

0 commit comments

Comments
 (0)