Skip to content

Commit 9c98ea4

Browse files
committed
clean up final
Signed-off-by: Connor Tsui <connor.tsui20@gmail.com>
1 parent 93bffa7 commit 9c98ea4

File tree

6 files changed

+89
-40
lines changed

6 files changed

+89
-40
lines changed

vortex-btrblocks/src/schemes/float.rs

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -74,14 +74,6 @@ impl rle::RLEConfig for FloatRLEConfig {
7474
fn generate_stats(array: &ArrayRef) -> FloatStats {
7575
FloatStats::generate(&array.to_primitive())
7676
}
77-
78-
fn compress_values(
79-
compressor: &CascadingCompressor,
80-
values: &PrimitiveArray,
81-
ctx: CompressorContext,
82-
) -> VortexResult<ArrayRef> {
83-
compressor.compress_canonical(Canonical::Primitive(values.clone()), ctx)
84-
}
8577
}
8678

8779
impl RLEStats for FloatStats {

vortex-btrblocks/src/schemes/integer.rs

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -98,14 +98,6 @@ impl rle::RLEConfig for IntRLEConfig {
9898
fn generate_stats(array: &ArrayRef) -> IntegerStats {
9999
IntegerStats::generate(&array.to_primitive())
100100
}
101-
102-
fn compress_values(
103-
compressor: &CascadingCompressor,
104-
values: &PrimitiveArray,
105-
ctx: CompressorContext,
106-
) -> VortexResult<ArrayRef> {
107-
compressor.compress_canonical(Canonical::Primitive(values.clone()), ctx)
108-
}
109101
}
110102

111103
impl RLEStats for IntegerStats {

vortex-btrblocks/src/schemes/rle.rs

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@ pub trait RLEStats {
3838
fn source(&self) -> &PrimitiveArray;
3939
}
4040

41-
// TODO(connor): This trait is super confusing, we should probably just remove it and hardcode the
42-
// only 2 implementations (integer and float).
4341
/// Configuration trait for RLE schemes.
4442
///
4543
/// Implement this trait to define the behavior of an RLE scheme for a specific
@@ -56,13 +54,6 @@ pub trait RLEConfig: Debug + Send + Sync + 'static {
5654

5755
/// Generates statistics for the given array.
5856
fn generate_stats(array: &ArrayRef) -> Self::Stats;
59-
60-
/// Compress the values array after RLE encoding.
61-
fn compress_values(
62-
compressor: &CascadingCompressor,
63-
values: &PrimitiveArray,
64-
ctx: CompressorContext,
65-
) -> VortexResult<ArrayRef>;
6657
}
6758

6859
/// RLE scheme that is generic over a configuration type.

vortex-compressor/public-api.lock

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -402,14 +402,6 @@ pub fn vortex_compressor::ctx::CompressorContext::fmt(&self, f: &mut core::fmt::
402402

403403
pub const vortex_compressor::ctx::MAX_CASCADE: usize
404404

405-
pub mod vortex_compressor::root_list_children
406-
407-
pub const vortex_compressor::root_list_children::ELEMENTS: usize
408-
409-
pub const vortex_compressor::root_list_children::OFFSETS: usize
410-
411-
pub const vortex_compressor::root_list_children::SIZES: usize
412-
413405
pub mod vortex_compressor::scheme
414406

415407
pub enum vortex_compressor::scheme::ChildSelection
@@ -1040,8 +1032,6 @@ impl vortex_compressor::CascadingCompressor
10401032

10411033
pub fn vortex_compressor::CascadingCompressor::compress(&self, array: &vortex_array::array::ArrayRef) -> vortex_error::VortexResult<vortex_array::array::ArrayRef>
10421034

1043-
pub fn vortex_compressor::CascadingCompressor::compress_canonical(&self, array: vortex_array::canonical::Canonical, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<vortex_array::array::ArrayRef>
1044-
10451035
pub fn vortex_compressor::CascadingCompressor::compress_child(&self, child: &vortex_array::array::ArrayRef, parent_ctx: &vortex_compressor::ctx::CompressorContext, parent_id: vortex_compressor::scheme::SchemeId, child_index: usize) -> vortex_error::VortexResult<vortex_array::array::ArrayRef>
10461036

10471037
pub fn vortex_compressor::CascadingCompressor::execution_ctx(&self) -> parking_lot::mutex::MutexGuard<'_, vortex_array::executor::ExecutionCtx>

vortex-compressor/src/compressor.rs

Lines changed: 89 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,7 @@ const ROOT_SCHEME_ID: SchemeId = SchemeId {
4747
};
4848

4949
/// Child indices for the compressor's list/listview compression.
50-
pub mod root_list_children {
51-
/// List elements child.
52-
pub const ELEMENTS: usize = 0;
50+
mod root_list_children {
5351
/// List/ListView offsets child.
5452
pub const OFFSETS: usize = 1;
5553
/// ListView sizes child.
@@ -166,7 +164,7 @@ impl CascadingCompressor {
166164
/// # Errors
167165
///
168166
/// Returns an error if compression of any sub-array fails.
169-
pub fn compress_canonical(
167+
fn compress_canonical(
170168
&self,
171169
array: Canonical,
172170
ctx: CompressorContext,
@@ -326,12 +324,23 @@ impl CascadingCompressor {
326324
let mut best: Option<(&'static dyn Scheme, f64)> = None;
327325

328326
for &scheme in schemes {
327+
// Constant detection on a sample is a false positive: the sample being constant
328+
// does not mean the full array is constant.
329+
if ctx.is_sample() && scheme.detects_constant() {
330+
continue;
331+
}
332+
329333
let ratio = scheme.expected_compression_ratio(self, data, ctx.clone())?;
330334

331335
tracing::debug!(scheme = %scheme.id(), ratio, "evaluated compression ratio");
332336

333337
if is_better_ratio(ratio, &best) {
334338
best = Some((scheme, ratio));
339+
340+
// Schemes that return f64::MAX (like Constant) cannot be beat, so stop early.
341+
if ratio == f64::MAX {
342+
break;
343+
}
335344
}
336345
}
337346

@@ -450,3 +459,79 @@ impl CascadingCompressor {
450459
fn is_better_ratio(ratio: f64, best: &Option<(&'static dyn Scheme, f64)>) -> bool {
451460
ratio.is_finite() && !ratio.is_subnormal() && ratio > 1.0 && best.is_none_or(|(_, r)| ratio > r)
452461
}
462+
463+
#[cfg(test)]
464+
mod tests {
465+
use super::*;
466+
use crate::builtins::FloatDictScheme;
467+
use crate::builtins::IntDictScheme;
468+
use crate::builtins::IntUncompressedScheme;
469+
use crate::builtins::StringDictScheme;
470+
use crate::ctx::CompressorContext;
471+
use crate::scheme::SchemeExt;
472+
473+
fn compressor() -> CascadingCompressor {
474+
CascadingCompressor::new(vec![
475+
&IntUncompressedScheme,
476+
&IntDictScheme,
477+
&FloatDictScheme,
478+
&StringDictScheme,
479+
])
480+
}
481+
482+
#[test]
483+
fn test_self_exclusion() {
484+
let c = compressor();
485+
let ctx = CompressorContext::default().descend_with_scheme(IntDictScheme.id(), 0);
486+
487+
// IntDictScheme is in the history, so it should be excluded.
488+
assert!(c.is_excluded(&IntDictScheme, &ctx));
489+
// IntUncompressedScheme is not in the history.
490+
assert!(!c.is_excluded(&IntUncompressedScheme, &ctx));
491+
}
492+
493+
#[test]
494+
fn test_root_exclusion_list_offsets() {
495+
let c = compressor();
496+
let ctx = CompressorContext::default()
497+
.descend_with_scheme(ROOT_SCHEME_ID, root_list_children::OFFSETS);
498+
499+
// IntDict should be excluded for list offsets.
500+
assert!(c.is_excluded(&IntDictScheme, &ctx));
501+
// IntUncompressed should not be excluded.
502+
assert!(!c.is_excluded(&IntUncompressedScheme, &ctx));
503+
}
504+
505+
#[test]
506+
fn test_push_rule_float_dict_excludes_int_dict_from_codes() {
507+
let c = compressor();
508+
// FloatDict cascading through codes (child 1).
509+
let ctx = CompressorContext::default().descend_with_scheme(FloatDictScheme.id(), 1);
510+
511+
// IntDict should be excluded from FloatDict's codes child.
512+
assert!(c.is_excluded(&IntDictScheme, &ctx));
513+
// IntUncompressed should not be excluded.
514+
assert!(!c.is_excluded(&IntUncompressedScheme, &ctx));
515+
}
516+
517+
#[test]
518+
fn test_push_rule_float_dict_excludes_int_dict_from_values() {
519+
let c = compressor();
520+
// FloatDict cascading through values (child 0).
521+
let ctx = CompressorContext::default().descend_with_scheme(FloatDictScheme.id(), 0);
522+
523+
// IntDict should also be excluded from FloatDict's values child (ALP propagation
524+
// replacement).
525+
assert!(c.is_excluded(&IntDictScheme, &ctx));
526+
}
527+
528+
#[test]
529+
fn test_no_exclusion_without_history() {
530+
let c = compressor();
531+
let ctx = CompressorContext::default();
532+
533+
// No history means no exclusions.
534+
assert!(!c.is_excluded(&IntDictScheme, &ctx));
535+
assert!(!c.is_excluded(&IntUncompressedScheme, &ctx));
536+
}
537+
}

vortex-compressor/src/lib.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,3 @@ mod sample;
2525

2626
mod compressor;
2727
pub use compressor::CascadingCompressor;
28-
pub use compressor::root_list_children;

0 commit comments

Comments
 (0)