Skip to content

Commit b5aab36

Browse files
committed
u
Signed-off-by: Joe Isaacs <[email protected]>
1 parent ae0840a commit b5aab36

File tree

1 file changed

+20
-5
lines changed

1 file changed

+20
-5
lines changed

vortex-btrblocks/src/string.rs

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -189,11 +189,26 @@ impl Scheme for VarBinScheme {
189189
_allowed_cascading: usize,
190190
_excludes: &[StringCode],
191191
) -> VortexResult<f64> {
192-
// VarBinScheme doesn't provide compression - it's a format conversion.
193-
// VarBinView is generally more efficient than VarBin for most workloads
194-
// (especially with small strings that can be inlined).
195-
// Return 0.0 to indicate this scheme should not be selected by the compressor.
196-
Ok(0.0)
192+
if stats.src.is_empty() {
193+
return Ok(1.0);
194+
}
195+
196+
let src = stats.source();
197+
198+
// Calculate VarBinView size using nbytes()
199+
let varbinview_size = src.as_ref().nbytes();
200+
201+
let string_bytes = src.buffers().iter().map(|b| b.len() as u64).sum::<u64>();
202+
203+
// Determine offset type size based on total string bytes
204+
// Arrow/Vortex uses i32 offsets if total size < u32::MAX, otherwise i64
205+
let offset_type_size = if string_bytes < u32::MAX as u64 { 4 } else { 8 };
206+
let offset_bytes = (src.len() as u64 + 1) * offset_type_size;
207+
208+
let varbin_size = string_bytes + offset_bytes;
209+
assert!(varbin_size > 0, "cannot be empty");
210+
211+
Ok(varbinview_size as f64 / varbin_size as f64)
197212
}
198213

199214
fn compress(

0 commit comments

Comments
 (0)