Skip to content

Commit 3487a1b

Browse files
fix[duckdb]: remove validity mask from vector export (#5739)
Signed-off-by: Joe Isaacs <[email protected]>
1 parent 47d6ba7 commit 3487a1b

File tree

5 files changed

+80
-14
lines changed

5 files changed

+80
-14
lines changed

vortex-duckdb/src/exporter/dict.rs

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ use vortex::array::arrays::ConstantArray;
1414
use vortex::array::arrays::ConstantVTable;
1515
use vortex::array::arrays::DictArray;
1616
use vortex::array::arrays::PrimitiveArray;
17+
use vortex::array::builtins::ArrayBuiltins;
18+
use vortex::array::mask::MaskExecutor;
1719
use vortex::array::vectors::VectorIntoArray;
1820
use vortex::compute;
1921
use vortex::compute2::take::Take;
@@ -186,7 +188,7 @@ pub(crate) fn new_vector_exporter_with_flatten(
186188
if let Some(constant) = values.as_opt::<ConstantVTable>() {
187189
return constant::new_exporter_with_mask(
188190
&ConstantArray::new(constant.scalar().clone(), array.codes().len()),
189-
array.codes().validity_mask(),
191+
array.codes().is_null()?.not()?.execute_mask(session)?,
190192
cache,
191193
);
192194
}
@@ -299,19 +301,22 @@ impl<I: IntegerPType + AsPrimitive<u32>> ColumnExporter for DictVectorExporter<I
299301

300302
#[cfg(test)]
301303
mod tests {
304+
use vortex::VortexSessionDefault;
302305
use vortex::array::IntoArray;
303306
use vortex::array::arrays::ConstantArray;
304307
use vortex::array::arrays::DictArray;
305308
use vortex::array::arrays::PrimitiveArray;
306309
use vortex::buffer::Buffer;
307310
use vortex::error::VortexResult;
311+
use vortex::session::VortexSession;
308312

309313
use crate::cpp;
310314
use crate::duckdb::DataChunk;
311315
use crate::duckdb::LogicalType;
312316
use crate::exporter::ColumnExporter;
313317
use crate::exporter::ConversionCache;
314318
use crate::exporter::dict::new_exporter_with_flatten;
319+
use crate::exporter::dict::new_vector_exporter_with_flatten;
315320
use crate::exporter::new_array_exporter;
316321

317322
pub(crate) fn new_exporter(
@@ -344,6 +349,54 @@ mod tests {
344349
);
345350
}
346351

352+
#[test]
353+
fn test_constant_dict_vector() {
354+
let arr = DictArray::new(
355+
PrimitiveArray::from_option_iter([None, Some(0u32)]).into_array(),
356+
ConstantArray::new(10, 1).into_array(),
357+
);
358+
359+
let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)]);
360+
361+
let session = VortexSession::default();
362+
new_vector_exporter_with_flatten(&arr, &ConversionCache::default(), &session, false)
363+
.unwrap()
364+
.export(0, 2, &mut chunk.get_vector(0))
365+
.unwrap();
366+
chunk.set_len(2);
367+
368+
assert_eq!(
369+
format!("{}", String::try_from(&chunk).unwrap()),
370+
r#"Chunk - [1 Columns]
371+
- FLAT INTEGER: 2 = [ NULL, 10]
372+
"#
373+
);
374+
}
375+
376+
#[test]
377+
fn test_constant_dict_vector_null() {
378+
let arr = DictArray::new(
379+
PrimitiveArray::from_option_iter([None::<u32>, None]).into_array(),
380+
ConstantArray::new(10, 1).into_array(),
381+
);
382+
383+
let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)]);
384+
385+
let session = VortexSession::default();
386+
new_vector_exporter_with_flatten(&arr, &ConversionCache::default(), &session, false)
387+
.unwrap()
388+
.export(0, 2, &mut chunk.get_vector(0))
389+
.unwrap();
390+
chunk.set_len(2);
391+
392+
assert_eq!(
393+
format!("{}", String::try_from(&chunk).unwrap()),
394+
r#"Chunk - [1 Columns]
395+
- CONSTANT INTEGER: 2 = [ NULL]
396+
"#
397+
);
398+
}
399+
347400
#[test]
348401
fn test_nullable_dict() {
349402
let arr = DictArray::new(

vortex-duckdb/src/exporter/fixed_size_list.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
use vortex::array::ArrayRef;
1212
use vortex::array::ToCanonical;
1313
use vortex::array::arrays::FixedSizeListArray;
14+
use vortex::array::validity::Validity;
15+
use vortex::array::vtable::ValidityHelper;
1416
use vortex::error::VortexResult;
1517
use vortex::mask::Mask;
1618
use vortex::session::VortexSession;
@@ -103,14 +105,14 @@ pub(crate) fn new_vector_exporter(
103105

104106
let ltype: LogicalType = array.dtype().try_into()?;
105107

106-
let mask = array.validity_mask();
108+
let mask = array.validity();
107109

108-
if let Mask::AllFalse(len) = mask {
109-
return Ok(all_invalid::new_exporter(len, &ltype));
110+
if matches!(mask, Validity::AllInvalid) {
111+
return Ok(all_invalid::new_exporter(array.len(), &ltype));
110112
}
111113

112114
Ok(Box::new(FixedSizeListExporter {
113-
validity: mask,
115+
validity: mask.to_mask(array.len()),
114116
elements_exporter,
115117
list_size: array.list_size(),
116118
}))

vortex-duckdb/src/exporter/list.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use vortex::array::ToCanonical;
1010
use vortex::array::VectorExecutor;
1111
use vortex::array::arrays::ListViewArray;
1212
use vortex::array::arrays::PrimitiveArray;
13+
use vortex::array::vtable::ValidityHelper;
1314
use vortex::dtype::IntegerPType;
1415
use vortex::dtype::PTypeDowncastExt;
1516
use vortex::dtype::match_each_integer_ptype;
@@ -210,7 +211,7 @@ pub(crate) fn new_vector_exporter(
210211
let boxed = match_each_integer_ptype!(offsets.ptype(), |O| {
211212
match_each_integer_ptype!(sizes.ptype(), |S| {
212213
Box::new(ListVectorExporter {
213-
validity: array.validity_mask(),
214+
validity: array.validity().to_mask(array.len()),
214215
duckdb_elements: shared_elements,
215216
offsets: offsets.downcast::<O>(),
216217
sizes: sizes.downcast::<O>(),

vortex-duckdb/src/exporter/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ use vortex::array::arrays::DictVTable;
3434
use vortex::array::arrays::StructArray;
3535
use vortex::array::arrays::TemporalArray;
3636
use vortex::array::iter::ArrayIterator;
37+
use vortex::array::vtable::ValidityHelper;
3738
use vortex::dtype::DType;
3839
use vortex::dtype::datetime::is_temporal_ext_type;
3940
use vortex::encodings::runend::RunEndVTable;
@@ -109,6 +110,7 @@ impl ArrayExporter {
109110
cache: &ConversionCache,
110111
session: &VortexSession,
111112
) -> VortexResult<Self> {
113+
assert!(array.validity().all_valid(array.len()));
112114
let fields = array
113115
.fields()
114116
.iter()

vortex-duckdb/src/exporter/struct_.rs

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4-
use vortex::array::IntoArray;
54
use vortex::array::arrays::StructArray;
5+
use vortex::array::optimizer::ArrayOptimizer;
6+
use vortex::array::vtable::ValidityHelper;
67
use vortex::compute::mask;
78
use vortex::error::VortexResult;
9+
use vortex::mask::Mask;
810
use vortex::session::VortexSession;
911

12+
use crate::LogicalType;
1013
use crate::duckdb::Vector;
1114
use crate::exporter::ColumnExporter;
1215
use crate::exporter::ConversionCache;
16+
use crate::exporter::all_invalid;
1317
use crate::exporter::new_array_exporter;
1418
use crate::exporter::new_vector_array_exporter;
1519
use crate::exporter::validity;
@@ -60,19 +64,23 @@ pub(crate) fn new_vector_exporter(
6064
cache: &ConversionCache,
6165
session: &VortexSession,
6266
) -> VortexResult<Box<dyn ColumnExporter>> {
63-
let validity = array.validity_mask();
64-
// DuckDB requires that the validity of the child be a subset of the parent struct so we mask out children with
65-
// parents nullability
66-
let validity_for_mask = array.dtype().is_nullable().then(|| !&validity);
67+
let validity = array.validity().to_mask(array.len());
68+
69+
if validity.all_false() {
70+
return Ok(all_invalid::new_exporter(
71+
array.len(),
72+
&LogicalType::try_from(array.dtype())?,
73+
));
74+
}
6775

6876
let children = array
6977
.fields()
7078
.iter()
7179
.map(|child| {
72-
if let Some(mv) = validity_for_mask.as_ref() {
73-
new_vector_array_exporter(mask(child, mv)?.into_array(), cache, session)
80+
if matches!(validity, Mask::Values(_)) {
81+
new_vector_array_exporter(mask(child, &validity)?.optimize()?, cache, session)
7482
} else {
75-
new_vector_array_exporter(child.to_array(), cache, session)
83+
new_vector_array_exporter(child.clone(), cache, session)
7684
}
7785
})
7886
.collect::<VortexResult<Vec<_>>>()?;

0 commit comments

Comments
 (0)