Skip to content

Commit 8f7b707

Browse files
authored
fix: yo dawg, I heard you like inlining (#5363)
i heard you like inlining. so i `#[inlined]` your `Inlined` so you can get inlining for your `Inlined`s --------- Signed-off-by: Andrew Duffy <[email protected]>
1 parent f48976e commit 8f7b707

File tree

3 files changed

+84
-25
lines changed

3 files changed

+84
-25
lines changed

encodings/zstd/src/array.rs

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -454,10 +454,32 @@ impl ZstdArray {
454454

455455
let decompressed = decompressed.freeze();
456456
// Last, we slice the exact values requested out of the decompressed data.
457-
let slice_validity = self
457+
let mut slice_validity = self
458458
.unsliced_validity
459459
.slice(self.slice_start..self.slice_stop);
460460

461+
// NOTE: this block handles setting the output type when the validity and DType disagree.
462+
//
463+
// ZSTD is a compact block compressor, meaning that null values are not stored inline in
464+
// the data frames. A ZSTD Array that was initialized must always hold onto its full
465+
// validity bitmap, even if sliced to only include non-null values.
466+
//
467+
// We ensure that the validity of the decompressed array ALWAYS matches the validity
468+
// implied by the DType.
469+
if !self.dtype().is_nullable() && slice_validity != Validity::NonNullable {
470+
assert!(
471+
slice_validity.all_valid(slice_n_rows),
472+
"ZSTD array expects to be non-nullable but there are nulls after decompression"
473+
);
474+
475+
slice_validity = Validity::NonNullable;
476+
} else if self.dtype.is_nullable() && slice_validity == Validity::NonNullable {
477+
slice_validity = Validity::AllValid;
478+
}
479+
//
480+
// END OF IMPORTANT BLOCK
481+
//
482+
461483
match &self.dtype {
462484
DType::Primitive(..) => {
463485
let slice_values_buffer = decompressed.slice(
@@ -531,6 +553,21 @@ impl ZstdArray {
531553
}
532554

533555
pub(crate) fn _slice(&self, start: usize, stop: usize) -> ZstdArray {
556+
let new_start = self.slice_start + start;
557+
let new_stop = self.slice_start + stop;
558+
559+
assert!(
560+
new_start <= self.slice_stop,
561+
"new slice start {new_start} exceeds end {}",
562+
self.slice_stop
563+
);
564+
565+
assert!(
566+
new_stop <= self.slice_stop,
567+
"new slice stop {new_stop} exceeds end {}",
568+
self.slice_stop
569+
);
570+
534571
ZstdArray {
535572
slice_start: self.slice_start + start,
536573
slice_stop: self.slice_start + stop,

encodings/zstd/src/compute/cast.rs

Lines changed: 45 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,46 +2,67 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use vortex_array::compute::{CastKernel, CastKernelAdapter};
5-
use vortex_array::{ArrayRef, IntoArray, register_kernel};
6-
use vortex_dtype::DType;
5+
use vortex_array::{ArrayRef, register_kernel};
6+
use vortex_dtype::{DType, Nullability};
77
use vortex_error::VortexResult;
88

99
use crate::{ZstdArray, ZstdVTable};
1010

1111
impl CastKernel for ZstdVTable {
1212
fn cast(&self, array: &ZstdArray, dtype: &DType) -> VortexResult<Option<ArrayRef>> {
13-
if !dtype.is_nullable() || !array.all_valid() {
14-
// We cannot cast to non-nullable since the validity containing nulls is used to decode
15-
// the ZSTD array, this would require rewriting tables.
13+
if !dtype.eq_ignore_nullability(array.dtype()) {
14+
// Type changes can't be handled in ZSTD, need to decode and tweak.
15+
// TODO(aduffy): handle trivial conversions like Binary -> UTF8, integer widening, etc.
1616
return Ok(None);
1717
}
18-
// ZstdArray is a general-purpose compression encoding using Zstandard compression.
19-
// It can handle nullability changes without decompression by updating the validity
20-
// bitmap, but type changes require decompression since the compressed data is
21-
// type-specific and Zstd operates on raw bytes.
22-
if array.dtype().eq_ignore_nullability(dtype) {
23-
// Create a new validity with the target nullability
24-
let new_validity = array
25-
.unsliced_validity
26-
.clone()
27-
.cast_nullability(dtype.nullability(), array.len())?;
28-
29-
return Ok(Some(
18+
19+
let src_nullability = array.dtype().nullability();
20+
let target_nullability = dtype.nullability();
21+
22+
match (src_nullability, target_nullability) {
23+
// Same type case. This should be handled in the layer above but for
24+
// completeness of the match arms we also handle it here.
25+
(Nullability::Nullable, Nullability::Nullable)
26+
| (Nullability::NonNullable, Nullability::NonNullable) => Ok(Some(array.to_array())),
27+
(Nullability::NonNullable, Nullability::Nullable) => Ok(Some(
28+
// nonnull => null, trivial cast by altering the validity
3029
ZstdArray::new(
3130
array.dictionary.clone(),
3231
array.frames.clone(),
3332
dtype.clone(),
3433
array.metadata.clone(),
3534
array.unsliced_n_rows(),
36-
new_validity,
35+
array.unsliced_validity.clone(),
3736
)
38-
._slice(array.slice_start(), array.slice_stop())
39-
.into_array(),
40-
));
37+
.slice(array.slice_start()..array.slice_stop()),
38+
)),
39+
(Nullability::Nullable, Nullability::NonNullable) => {
40+
// null => non-null works if there are no nulls in the sliced range
41+
let sliced_len = array.slice_stop() - array.slice_start();
42+
let has_nulls = !array
43+
.unsliced_validity
44+
.slice(array.slice_start()..array.slice_stop())
45+
.all_valid(sliced_len);
46+
47+
// We don't attempt to handle casting when there are nulls.
48+
if has_nulls {
49+
return Ok(None);
50+
}
51+
52+
// If there are no nulls, the cast is trivial
53+
Ok(Some(
54+
ZstdArray::new(
55+
array.dictionary.clone(),
56+
array.frames.clone(),
57+
dtype.clone(),
58+
array.metadata.clone(),
59+
array.unsliced_n_rows(),
60+
array.unsliced_validity.clone(),
61+
)
62+
.slice(array.slice_start()..array.slice_stop()),
63+
))
64+
}
4165
}
42-
43-
// For other casts (e.g., type changes), decode to canonical and let the underlying array handle it
44-
Ok(None)
4566
}
4667
}
4768

vortex-vector/src/binaryview/view.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ pub struct Inlined {
4646

4747
impl Inlined {
4848
/// Creates a new inlined representation from the provided value of constant size.
49+
#[inline]
4950
fn new<const N: usize>(value: &[u8]) -> Self {
5051
debug_assert_eq!(value.len(), N);
5152
let mut inlined = Self {

0 commit comments

Comments
 (0)