Skip to content

Commit 4560a87

Browse files
authored
Some fixes to BtrBlocks compressor (#2500)
* Downscaling of patch indices (todo: are there other patches than bitpacking?) * todo: Why does btrblocks incorrectly choose ALP vs dict?
1 parent 114368c commit 4560a87

File tree

13 files changed

+77
-30
lines changed

13 files changed

+77
-30
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/quickstart/python.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ Use :func:`~vortex.compress` to compress the Vortex array and check the relative
3737

3838
>>> cvtx = vx.compress(vtx)
3939
>>> cvtx.nbytes
40-
14415
40+
15166
4141
>>> cvtx.nbytes / vtx.nbytes
4242
0.10...
4343

encodings/alp/src/alp_rd/array.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,11 @@ impl ALPRDArray {
117117
&self.right_parts
118118
}
119119

120+
#[inline]
121+
pub fn right_bit_width(&self) -> u8 {
122+
self.right_bit_width
123+
}
124+
120125
/// Patches of left-most bits.
121126
pub fn left_parts_patches(&self) -> Option<&Patches> {
122127
self.left_parts_patches.as_ref()
@@ -128,9 +133,8 @@ impl ALPRDArray {
128133
&self.left_parts_dictionary
129134
}
130135

131-
#[inline]
132-
pub(crate) fn right_bit_width(&self) -> u8 {
133-
self.right_bit_width
136+
pub fn replace_left_parts_patches(&mut self, patches: Option<Patches>) {
137+
self.left_parts_patches = patches;
134138
}
135139
}
136140

encodings/fastlanes/src/bitpacking/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,10 @@ impl BitPackedArray {
181181
self.patches.as_ref()
182182
}
183183

184+
pub fn replace_patches(&mut self, patches: Option<Patches>) {
185+
self.patches = patches;
186+
}
187+
184188
#[inline]
185189
pub fn offset(&self) -> u16 {
186190
self.offset

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ packages = ["dummy"] # Required for workspace project
1515

1616
[tool.uv]
1717
managed = true
18-
required-version = ">=0.5.0"
18+
required-version = ">=0.6.0"
1919
# Currently, all dev dependencies live in the root since uv doesn't have transitive dev dependencies.
2020
# See: https://github.com/astral-sh/uv/issues/7541
2121
dev-dependencies = [

pyvortex/src/compress.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use pyo3::prelude::*;
2-
use vortex::sampling_compressor::SamplingCompressor;
2+
use vortex::compressor::BtrBlocksCompressor;
33

44
use crate::arrays::PyArray;
55
use crate::install_module;
@@ -47,9 +47,6 @@ pub(crate) fn init(py: Python, parent: &Bound<PyModule>) -> PyResult<()> {
4747
/// 'vortex.alp(0x11)(f64?, len=1000)'
4848
#[pyfunction]
4949
pub fn compress<'py>(array: &'py Bound<'py, PyArray>) -> PyResult<Bound<'py, PyArray>> {
50-
let compressor = SamplingCompressor::default();
51-
let inner = compressor
52-
.compress(array.borrow().as_ref(), None)?
53-
.into_array();
54-
PyArray::init(array.py(), inner)
50+
let compressed = BtrBlocksCompressor.compress(array.borrow().as_ref())?;
51+
PyArray::init(array.py(), compressed)
5552
}

uv.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-btrblocks/src/float.rs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ use vortex_runend::compress::runend_encode;
1414
use self::stats::FloatStats;
1515
use crate::float::dictionary::dictionary_encode;
1616
use crate::integer::{IntCompressor, IntegerStats};
17+
use crate::patches::compress_patches;
1718
use crate::{
1819
Compressor, CompressorStats, GenerateStatsOptions, Scheme,
1920
estimate_compression_ratio_with_sampling, integer,
@@ -184,14 +185,6 @@ impl Scheme for ALPScheme {
184185
return Ok(0.0);
185186
}
186187

187-
// If Dict/RLE is feasible, we want to do that before ALP, and then only ALP encode
188-
// the values.
189-
if stats.average_run_length >= RUN_END_THRESHOLD
190-
|| stats.distinct_values_count < stats.value_count / 2
191-
{
192-
return Ok(0.0);
193-
}
194-
195188
estimate_compression_ratio_with_sampling(
196189
self,
197190
stats,
@@ -225,10 +218,9 @@ impl Scheme for ALPScheme {
225218
let compressed_alp_ints =
226219
IntCompressor::compress(&alp_ints, is_sample, allowed_cascading - 1, &int_excludes)?;
227220

228-
Ok(
229-
ALPArray::try_new(compressed_alp_ints, alp.exponents(), alp.patches().cloned())?
230-
.into_array(),
231-
)
221+
let patches = alp.patches().map(compress_patches).transpose()?;
222+
223+
Ok(ALPArray::try_new(compressed_alp_ints, alp.exponents(), patches)?.into_array())
232224
}
233225
}
234226

@@ -273,7 +265,15 @@ impl Scheme for ALPRDScheme {
273265
ptype => vortex_panic!("cannot ALPRD compress ptype {ptype}"),
274266
};
275267

276-
Ok(encoder.encode(stats.source()).into_array())
268+
let mut alp_rd = encoder.encode(stats.source());
269+
270+
let patches = alp_rd
271+
.left_parts_patches()
272+
.map(compress_patches)
273+
.transpose()?;
274+
alp_rd.replace_left_parts_patches(patches);
275+
276+
Ok(alp_rd.into_array())
277277
}
278278
}
279279

vortex-btrblocks/src/integer.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ use vortex_zigzag::{ZigZagArray, zigzag_encode};
2626

2727
use crate::downscale::downscale_integer_array;
2828
use crate::integer::dictionary::dictionary_encode;
29+
use crate::patches::compress_patches;
2930
use crate::{
3031
Compressor, CompressorStats, GenerateStatsOptions, Scheme,
3132
estimate_compression_ratio_with_sampling,
@@ -406,7 +407,11 @@ impl Scheme for BitPackingScheme {
406407
if bw as usize == stats.source().ptype().bit_width() {
407408
return Ok(stats.source().clone().into_array());
408409
}
409-
let packed = bitpack_encode(stats.source(), bw)?;
410+
let mut packed = bitpack_encode(stats.source(), bw)?;
411+
412+
let patches = packed.patches().map(compress_patches).transpose()?;
413+
packed.replace_patches(patches);
414+
410415
Ok(packed.into_array())
411416
}
412417
}

vortex-btrblocks/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ pub use crate::temporal::compress_temporal;
1717
mod downscale;
1818
mod float;
1919
pub mod integer;
20+
mod patches;
2021
mod sample;
2122
mod string;
2223
mod temporal;

0 commit comments

Comments
 (0)