Skip to content

Commit 4d2291e

Browse files
feat[layout]: dict layout known code nullability (#5063)
This is not a breaking change. This can allow expr pushdown on the values without having to check the codes validity. Mirrors: #3736 --------- Signed-off-by: Joe Isaacs <[email protected]>
1 parent c118348 commit 4d2291e

File tree

2 files changed

+21
-13
lines changed

2 files changed

+21
-13
lines changed

encodings/dict/src/serde.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,12 @@ impl SerdeVTable<DictVTable> for DictVTable {
5454
children.len()
5555
)
5656
}
57-
let codes_nullable: Nullability = metadata
57+
let codes_nullable = metadata
5858
.is_nullable_codes
59-
// The old behaviour of (without `is_nullable_codes` metadata) used the nullability
60-
// of the values (and whole array).
61-
.unwrap_or_else(|| dtype.is_nullable())
62-
.into();
59+
.map(Nullability::from)
60+
// If no `is_nullable_codes` metadata use the nullability of the values
61+
// (and whole array) as before.
62+
.unwrap_or_else(|| dtype.nullability());
6363
let codes_dtype = DType::Primitive(metadata.codes_ptype(), codes_nullable);
6464
let codes = children.get(0, &codes_dtype, len)?;
6565
let values = children.get(1, dtype, metadata.values_len as usize)?;

vortex-layout/src/layouts/dict/mod.rs

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use std::sync::Arc;
88

99
use reader::DictReader;
1010
use vortex_array::{ArrayContext, DeserializeMetadata, ProstMetadata};
11-
use vortex_dtype::{DType, PType};
11+
use vortex_dtype::{DType, Nullability, PType};
1212
use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_panic};
1313

1414
use crate::children::LayoutChildren;
@@ -41,9 +41,10 @@ impl VTable for DictVTable {
4141
}
4242

4343
fn metadata(layout: &Self::Layout) -> Self::Metadata {
44-
ProstMetadata(DictLayoutMetadata::new(
45-
PType::try_from(layout.codes.dtype()).vortex_expect("ptype"),
46-
))
44+
let mut metadata =
45+
DictLayoutMetadata::new(PType::try_from(layout.codes.dtype()).vortex_expect("ptype"));
46+
metadata.is_nullable_codes = Some(layout.codes.dtype().is_nullable());
47+
ProstMetadata(metadata)
4748
}
4849

4950
fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
@@ -92,10 +93,14 @@ impl VTable for DictVTable {
9293
_ctx: ArrayContext,
9394
) -> VortexResult<Self::Layout> {
9495
let values = children.child(0, dtype)?;
95-
let codes = children.child(
96-
1,
97-
&DType::Primitive(metadata.codes_ptype(), dtype.nullability()),
98-
)?;
96+
let codes_nullable = metadata
97+
.is_nullable_codes
98+
.map(Nullability::from)
99+
// The old behaviour (without `is_nullable_codes` metadata) used the nullability
100+
// of the values (and whole array).
101+
// see [`SerdeVTable<DictVTable>::build`].
102+
.unwrap_or_else(|| dtype.nullability());
103+
let codes = children.child(1, &DType::Primitive(metadata.codes_ptype(), codes_nullable))?;
99104
Ok(DictLayout { values, codes })
100105
}
101106
}
@@ -120,6 +125,9 @@ pub struct DictLayoutMetadata {
120125
#[prost(enumeration = "PType", tag = "1")]
121126
// i32 is required for proto, use the generated getter to read this field.
122127
codes_ptype: i32,
128+
// nullable codes are optional since they were added after stabilisation
129+
#[prost(optional, bool, tag = "2")]
130+
is_nullable_codes: Option<bool>,
123131
}
124132

125133
impl DictLayoutMetadata {

0 commit comments

Comments
 (0)