Skip to content

Commit 2d2e93b

Browse files
committed
feat: add IS NULL/NOT NULL predicate support for complex types
1 parent b7ba2e8 commit 2d2e93b

File tree

2 files changed

+222
-12
lines changed

2 files changed

+222
-12
lines changed

crates/iceberg/src/expr/accessor.rs

Lines changed: 202 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,54 @@ use std::sync::Arc;
1919

2020
use serde_derive::{Deserialize, Serialize};
2121

22-
use crate::spec::{Datum, Literal, PrimitiveType, Struct};
22+
use crate::spec::{Datum, Literal, PrimitiveType, Struct, Type};
2323
use crate::{Error, ErrorKind, Result};
2424

25+
/// The type of field that an accessor points to.
26+
/// Complex types (Struct, List, Map) can only be used for null checks,
27+
/// while Primitive types can be used for value extraction.
28+
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
29+
pub enum AccessorType {
30+
/// Primitive type - supports value extraction and null checks
31+
Primitive(PrimitiveType),
32+
/// Struct type - only supports null checks
33+
Struct,
34+
/// List type - only supports null checks
35+
List,
36+
/// Map type - only supports null checks
37+
Map,
38+
}
39+
40+
impl AccessorType {
41+
/// Returns the primitive type if this is a primitive accessor, otherwise None
42+
pub fn as_primitive(&self) -> Option<&PrimitiveType> {
43+
match self {
44+
AccessorType::Primitive(p) => Some(p),
45+
_ => None,
46+
}
47+
}
48+
49+
/// Returns true if this accessor type is complex (non-primitive)
50+
pub fn is_complex(&self) -> bool {
51+
!matches!(self, AccessorType::Primitive(_))
52+
}
53+
}
54+
55+
impl From<&Type> for AccessorType {
56+
fn from(ty: &Type) -> Self {
57+
match ty {
58+
Type::Primitive(p) => AccessorType::Primitive(p.clone()),
59+
Type::Struct(_) => AccessorType::Struct,
60+
Type::List(_) => AccessorType::List,
61+
Type::Map(_) => AccessorType::Map,
62+
}
63+
}
64+
}
65+
2566
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
2667
pub struct StructAccessor {
2768
position: usize,
28-
r#type: PrimitiveType,
69+
accessor_type: AccessorType,
2970
inner: Option<Box<StructAccessor>>,
3071
}
3172

@@ -35,15 +76,25 @@ impl StructAccessor {
3576
pub(crate) fn new(position: usize, r#type: PrimitiveType) -> Self {
3677
StructAccessor {
3778
position,
38-
r#type,
79+
accessor_type: AccessorType::Primitive(r#type),
80+
inner: None,
81+
}
82+
}
83+
84+
/// Create a new accessor for a complex type (struct, list, or map).
85+
/// Complex type accessors can only be used for null checks.
86+
pub(crate) fn new_complex(position: usize, ty: &Type) -> Self {
87+
StructAccessor {
88+
position,
89+
accessor_type: AccessorType::from(ty),
3990
inner: None,
4091
}
4192
}
4293

4394
pub(crate) fn wrap(position: usize, inner: Box<StructAccessor>) -> Self {
4495
StructAccessor {
4596
position,
46-
r#type: inner.r#type().clone(),
97+
accessor_type: inner.accessor_type().clone(),
4798
inner: Some(inner),
4899
}
49100
}
@@ -52,16 +103,55 @@ impl StructAccessor {
52103
self.position
53104
}
54105

106+
/// Returns the accessor type (primitive or complex)
107+
pub(crate) fn accessor_type(&self) -> &AccessorType {
108+
&self.accessor_type
109+
}
110+
111+
/// Returns the primitive type if this is a primitive accessor.
112+
/// For backward compatibility with code that expects a primitive type.
55113
pub(crate) fn r#type(&self) -> &PrimitiveType {
56-
&self.r#type
114+
match &self.accessor_type {
115+
AccessorType::Primitive(p) => p,
116+
// This should only be called for primitive accessors
117+
// Return a placeholder for complex types to avoid breaking existing code
118+
_ => &PrimitiveType::Boolean, // Placeholder, should not be used
119+
}
120+
}
121+
122+
/// Check if the value at this accessor's position is null.
123+
/// This works for both primitive and complex types.
124+
pub(crate) fn is_null(&self, container: &Struct) -> Result<bool> {
125+
match &self.inner {
126+
None => Ok(container[self.position].is_none()),
127+
Some(inner) => {
128+
if let Some(Literal::Struct(wrapped)) = &container[self.position] {
129+
inner.is_null(wrapped)
130+
} else if container[self.position].is_none() {
131+
Ok(true)
132+
} else {
133+
Err(Error::new(
134+
ErrorKind::Unexpected,
135+
"Nested accessor should only be wrapping a Struct",
136+
))
137+
}
138+
}
139+
}
57140
}
58141

59142
pub(crate) fn get<'a>(&'a self, container: &'a Struct) -> Result<Option<Datum>> {
60143
match &self.inner {
61144
None => match &container[self.position] {
62145
None => Ok(None),
63146
Some(Literal::Primitive(literal)) => {
64-
Ok(Some(Datum::new(self.r#type().clone(), literal.clone())))
147+
if let AccessorType::Primitive(prim_type) = &self.accessor_type {
148+
Ok(Some(Datum::new(prim_type.clone(), literal.clone())))
149+
} else {
150+
Err(Error::new(
151+
ErrorKind::Unexpected,
152+
"Cannot extract Datum from complex type accessor",
153+
))
154+
}
65155
}
66156
Some(_) => Err(Error::new(
67157
ErrorKind::Unexpected,
@@ -84,8 +174,11 @@ impl StructAccessor {
84174

85175
#[cfg(test)]
86176
mod tests {
87-
use crate::expr::accessor::StructAccessor;
88-
use crate::spec::{Datum, Literal, PrimitiveType, Struct};
177+
use std::sync::Arc;
178+
179+
use crate::expr::accessor::{AccessorType, StructAccessor};
180+
use crate::spec::datatypes::{ListType, MapType, NestedField, StructType};
181+
use crate::spec::{Datum, Literal, PrimitiveType, Struct, Type};
89182

90183
#[test]
91184
fn test_single_level_accessor() {
@@ -150,4 +243,105 @@ mod tests {
150243

151244
assert_eq!(accessor.get(&test_struct).unwrap(), None);
152245
}
246+
247+
#[test]
248+
fn test_complex_type_accessor_struct() {
249+
let struct_type = Type::Struct(StructType::new(vec![Arc::new(NestedField::required(
250+
1,
251+
"inner",
252+
Type::Primitive(PrimitiveType::String),
253+
))]));
254+
let accessor = StructAccessor::new_complex(0, &struct_type);
255+
256+
assert!(accessor.accessor_type().is_complex());
257+
assert!(matches!(accessor.accessor_type(), AccessorType::Struct));
258+
259+
// Test null check on non-null struct
260+
let inner_struct = Struct::from_iter(vec![Some(Literal::string("test".to_string()))]);
261+
let test_struct = Struct::from_iter(vec![Some(Literal::Struct(inner_struct))]);
262+
assert!(!accessor.is_null(&test_struct).unwrap());
263+
264+
// Test null check on null struct
265+
let null_struct = Struct::from_iter(vec![None]);
266+
assert!(accessor.is_null(&null_struct).unwrap());
267+
}
268+
269+
#[test]
270+
fn test_complex_type_accessor_list() {
271+
let list_type = Type::List(ListType::new(Arc::new(NestedField::list_element(
272+
1,
273+
Type::Primitive(PrimitiveType::Int),
274+
true,
275+
))));
276+
let accessor = StructAccessor::new_complex(1, &list_type);
277+
278+
assert!(accessor.accessor_type().is_complex());
279+
assert!(matches!(accessor.accessor_type(), AccessorType::List));
280+
281+
// Test null check on non-null list
282+
let test_struct = Struct::from_iter(vec![
283+
Some(Literal::bool(false)),
284+
Some(Literal::List(vec![Some(Literal::int(1)), Some(Literal::int(2))])),
285+
]);
286+
assert!(!accessor.is_null(&test_struct).unwrap());
287+
288+
// Test null check on null list
289+
let null_struct = Struct::from_iter(vec![Some(Literal::bool(false)), None]);
290+
assert!(accessor.is_null(&null_struct).unwrap());
291+
}
292+
293+
#[test]
294+
fn test_complex_type_accessor_map() {
295+
let map_type = Type::Map(MapType::new(
296+
Arc::new(NestedField::map_key_element(
297+
1,
298+
Type::Primitive(PrimitiveType::String),
299+
)),
300+
Arc::new(NestedField::map_value_element(
301+
2,
302+
Type::Primitive(PrimitiveType::Int),
303+
true,
304+
)),
305+
));
306+
let accessor = StructAccessor::new_complex(0, &map_type);
307+
308+
assert!(accessor.accessor_type().is_complex());
309+
assert!(matches!(accessor.accessor_type(), AccessorType::Map));
310+
311+
// Test null check on null map
312+
let null_struct = Struct::from_iter(vec![None]);
313+
assert!(accessor.is_null(&null_struct).unwrap());
314+
}
315+
316+
#[test]
317+
fn test_primitive_is_null() {
318+
let accessor = StructAccessor::new(0, PrimitiveType::Int);
319+
320+
// Test null check on non-null primitive
321+
let test_struct = Struct::from_iter(vec![Some(Literal::int(42))]);
322+
assert!(!accessor.is_null(&test_struct).unwrap());
323+
324+
// Test null check on null primitive
325+
let null_struct = Struct::from_iter(vec![None]);
326+
assert!(accessor.is_null(&null_struct).unwrap());
327+
}
328+
329+
#[test]
330+
fn test_accessor_type_as_primitive() {
331+
let primitive = AccessorType::Primitive(PrimitiveType::Int);
332+
assert_eq!(primitive.as_primitive(), Some(&PrimitiveType::Int));
333+
assert!(!primitive.is_complex());
334+
335+
let struct_type = AccessorType::Struct;
336+
assert_eq!(struct_type.as_primitive(), None);
337+
assert!(struct_type.is_complex());
338+
339+
let list_type = AccessorType::List;
340+
assert_eq!(list_type.as_primitive(), None);
341+
assert!(list_type.is_complex());
342+
343+
let map_type = AccessorType::Map;
344+
assert_eq!(map_type.as_primitive(), None);
345+
assert!(map_type.is_complex());
346+
}
153347
}

crates/iceberg/src/spec/schema/mod.rs

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -196,14 +196,22 @@ impl SchemaBuilder {
196196
}
197197

198198
Type::Struct(nested) => {
199+
// add an accessor for the struct itself (for null checks)
200+
let struct_accessor =
201+
Arc::new(StructAccessor::new_complex(pos, field.field_type.as_ref()));
202+
map.insert(field.id, struct_accessor);
203+
199204
// add accessors for nested fields
200205
for (field_id, accessor) in Self::build_accessors_nested(nested.fields()) {
201206
let new_accessor = Arc::new(StructAccessor::wrap(pos, accessor));
202207
map.insert(field_id, new_accessor.clone());
203208
}
204209
}
205-
_ => {
206-
// Accessors don't get built for Map or List types
210+
Type::List(_) | Type::Map(_) => {
211+
// add an accessor for complex types (for null checks)
212+
let accessor =
213+
Arc::new(StructAccessor::new_complex(pos, field.field_type.as_ref()));
214+
map.insert(field.id, accessor);
207215
}
208216
}
209217
}
@@ -220,6 +228,11 @@ impl SchemaBuilder {
220228
results.push((field.id, accessor));
221229
}
222230
Type::Struct(nested) => {
231+
// add an accessor for the struct itself (for null checks)
232+
let struct_accessor =
233+
Box::new(StructAccessor::new_complex(pos, field.field_type.as_ref()));
234+
results.push((field.id, struct_accessor));
235+
223236
let nested_accessors = Self::build_accessors_nested(nested.fields());
224237

225238
let wrapped_nested_accessors =
@@ -230,8 +243,11 @@ impl SchemaBuilder {
230243

231244
results.extend(wrapped_nested_accessors);
232245
}
233-
_ => {
234-
// Accessors don't get built for Map or List types
246+
Type::List(_) | Type::Map(_) => {
247+
// add an accessor for complex types (for null checks)
248+
let accessor =
249+
Box::new(StructAccessor::new_complex(pos, field.field_type.as_ref()));
250+
results.push((field.id, accessor));
235251
}
236252
}
237253
}

0 commit comments

Comments
 (0)