Skip to content

Commit 0cb9556

Browse files
committed
feat[duckdb]: remove validity mask from vector export
Signed-off-by: Joe Isaacs <[email protected]>
1 parent 47d6ba7 commit 0cb9556

File tree

5 files changed

+85
-14
lines changed

5 files changed

+85
-14
lines changed

vortex-duckdb/src/exporter/dict.rs

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,17 @@ use vortex::array::arrays::ConstantArray;
1414
use vortex::array::arrays::ConstantVTable;
1515
use vortex::array::arrays::DictArray;
1616
use vortex::array::arrays::PrimitiveArray;
17+
use vortex::array::mask::MaskExecutor;
1718
use vortex::array::vectors::VectorIntoArray;
1819
use vortex::compute;
1920
use vortex::compute2::take::Take;
2021
use vortex::dtype::IntegerPType;
2122
use vortex::dtype::PTypeDowncastExt;
2223
use vortex::dtype::match_each_integer_ptype;
2324
use vortex::error::VortexResult;
25+
use vortex::expr::is_null;
26+
use vortex::expr::not;
27+
use vortex::expr::root;
2428
use vortex::mask::Mask;
2529
use vortex::session::VortexSession;
2630
use vortex_vector::VectorOps;
@@ -186,7 +190,10 @@ pub(crate) fn new_vector_exporter_with_flatten(
186190
if let Some(constant) = values.as_opt::<ConstantVTable>() {
187191
return constant::new_exporter_with_mask(
188192
&ConstantArray::new(constant.scalar().clone(), array.codes().len()),
189-
array.codes().validity_mask(),
193+
array
194+
.codes()
195+
.apply(&not(is_null(root())))?
196+
.execute_mask(session)?,
190197
cache,
191198
);
192199
}
@@ -299,19 +306,22 @@ impl<I: IntegerPType + AsPrimitive<u32>> ColumnExporter for DictVectorExporter<I
299306

300307
#[cfg(test)]
301308
mod tests {
309+
use vortex::VortexSessionDefault;
302310
use vortex::array::IntoArray;
303311
use vortex::array::arrays::ConstantArray;
304312
use vortex::array::arrays::DictArray;
305313
use vortex::array::arrays::PrimitiveArray;
306314
use vortex::buffer::Buffer;
307315
use vortex::error::VortexResult;
316+
use vortex::session::VortexSession;
308317

309318
use crate::cpp;
310319
use crate::duckdb::DataChunk;
311320
use crate::duckdb::LogicalType;
312321
use crate::exporter::ColumnExporter;
313322
use crate::exporter::ConversionCache;
314323
use crate::exporter::dict::new_exporter_with_flatten;
324+
use crate::exporter::dict::new_vector_exporter_with_flatten;
315325
use crate::exporter::new_array_exporter;
316326

317327
pub(crate) fn new_exporter(
@@ -344,6 +354,54 @@ mod tests {
344354
);
345355
}
346356

357+
#[test]
358+
fn test_constant_dict_vector() {
359+
let arr = DictArray::new(
360+
PrimitiveArray::from_option_iter([None, Some(0u32)]).into_array(),
361+
ConstantArray::new(10, 1).into_array(),
362+
);
363+
364+
let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)]);
365+
366+
let session = VortexSession::default();
367+
new_vector_exporter_with_flatten(&arr, &ConversionCache::default(), &session, false)
368+
.unwrap()
369+
.export(0, 2, &mut chunk.get_vector(0))
370+
.unwrap();
371+
chunk.set_len(2);
372+
373+
assert_eq!(
374+
format!("{}", String::try_from(&chunk).unwrap()),
375+
r#"Chunk - [1 Columns]
376+
- FLAT INTEGER: 2 = [ NULL, 10]
377+
"#
378+
);
379+
}
380+
381+
#[test]
382+
fn test_constant_dict_vector_null() {
383+
let arr = DictArray::new(
384+
PrimitiveArray::from_option_iter([None::<u32>, None]).into_array(),
385+
ConstantArray::new(10, 1).into_array(),
386+
);
387+
388+
let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)]);
389+
390+
let session = VortexSession::default();
391+
new_vector_exporter_with_flatten(&arr, &ConversionCache::default(), &session, false)
392+
.unwrap()
393+
.export(0, 2, &mut chunk.get_vector(0))
394+
.unwrap();
395+
chunk.set_len(2);
396+
397+
assert_eq!(
398+
format!("{}", String::try_from(&chunk).unwrap()),
399+
r#"Chunk - [1 Columns]
400+
- CONSTANT INTEGER: 2 = [ NULL]
401+
"#
402+
);
403+
}
404+
347405
#[test]
348406
fn test_nullable_dict() {
349407
let arr = DictArray::new(

vortex-duckdb/src/exporter/fixed_size_list.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
use vortex::array::ArrayRef;
1212
use vortex::array::ToCanonical;
1313
use vortex::array::arrays::FixedSizeListArray;
14+
use vortex::array::validity::Validity;
15+
use vortex::array::vtable::ValidityHelper;
1416
use vortex::error::VortexResult;
1517
use vortex::mask::Mask;
1618
use vortex::session::VortexSession;
@@ -103,14 +105,14 @@ pub(crate) fn new_vector_exporter(
103105

104106
let ltype: LogicalType = array.dtype().try_into()?;
105107

106-
let mask = array.validity_mask();
108+
let mask = array.validity();
107109

108-
if let Mask::AllFalse(len) = mask {
109-
return Ok(all_invalid::new_exporter(len, &ltype));
110+
if let Validity::AllInvalid = mask {
111+
return Ok(all_invalid::new_exporter(array.len(), &ltype));
110112
}
111113

112114
Ok(Box::new(FixedSizeListExporter {
113-
validity: mask,
115+
validity: mask.to_mask(array.len()),
114116
elements_exporter,
115117
list_size: array.list_size(),
116118
}))

vortex-duckdb/src/exporter/list.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use vortex::array::ToCanonical;
1010
use vortex::array::VectorExecutor;
1111
use vortex::array::arrays::ListViewArray;
1212
use vortex::array::arrays::PrimitiveArray;
13+
use vortex::array::vtable::ValidityHelper;
1314
use vortex::dtype::IntegerPType;
1415
use vortex::dtype::PTypeDowncastExt;
1516
use vortex::dtype::match_each_integer_ptype;
@@ -210,7 +211,7 @@ pub(crate) fn new_vector_exporter(
210211
let boxed = match_each_integer_ptype!(offsets.ptype(), |O| {
211212
match_each_integer_ptype!(sizes.ptype(), |S| {
212213
Box::new(ListVectorExporter {
213-
validity: array.validity_mask(),
214+
validity: array.validity().to_mask(array.len()),
214215
duckdb_elements: shared_elements,
215216
offsets: offsets.downcast::<O>(),
216217
sizes: sizes.downcast::<O>(),

vortex-duckdb/src/exporter/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ use vortex::array::arrays::DictVTable;
3434
use vortex::array::arrays::StructArray;
3535
use vortex::array::arrays::TemporalArray;
3636
use vortex::array::iter::ArrayIterator;
37+
use vortex::array::vtable::ValidityHelper;
3738
use vortex::dtype::DType;
3839
use vortex::dtype::datetime::is_temporal_ext_type;
3940
use vortex::encodings::runend::RunEndVTable;
@@ -109,6 +110,7 @@ impl ArrayExporter {
109110
cache: &ConversionCache,
110111
session: &VortexSession,
111112
) -> VortexResult<Self> {
113+
assert!(array.validity().all_valid(array.len()));
112114
let fields = array
113115
.fields()
114116
.iter()

vortex-duckdb/src/exporter/struct_.rs

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4-
use vortex::array::IntoArray;
54
use vortex::array::arrays::StructArray;
5+
use vortex::array::optimizer::ArrayOptimizer;
6+
use vortex::array::vtable::ValidityHelper;
67
use vortex::compute::mask;
78
use vortex::error::VortexResult;
9+
use vortex::mask::Mask;
810
use vortex::session::VortexSession;
911

12+
use crate::LogicalType;
1013
use crate::duckdb::Vector;
1114
use crate::exporter::ColumnExporter;
1215
use crate::exporter::ConversionCache;
16+
use crate::exporter::all_invalid;
1317
use crate::exporter::new_array_exporter;
1418
use crate::exporter::new_vector_array_exporter;
1519
use crate::exporter::validity;
@@ -60,19 +64,23 @@ pub(crate) fn new_vector_exporter(
6064
cache: &ConversionCache,
6165
session: &VortexSession,
6266
) -> VortexResult<Box<dyn ColumnExporter>> {
63-
let validity = array.validity_mask();
64-
// DuckDB requires that the validity of the child be a subset of the parent struct so we mask out children with
65-
// parents nullability
66-
let validity_for_mask = array.dtype().is_nullable().then(|| !&validity);
67+
let validity = array.validity().to_mask(array.len());
68+
69+
if validity.all_false() {
70+
return Ok(all_invalid::new_exporter(
71+
array.len(),
72+
&LogicalType::try_from(array.dtype())?,
73+
));
74+
}
6775

6876
let children = array
6977
.fields()
7078
.iter()
7179
.map(|child| {
72-
if let Some(mv) = validity_for_mask.as_ref() {
73-
new_vector_array_exporter(mask(child, mv)?.into_array(), cache, session)
80+
if matches!(validity, Mask::Values(_)) {
81+
new_vector_array_exporter(mask(child, &validity)?.optimize()?, cache, session)
7482
} else {
75-
new_vector_array_exporter(child.to_array(), cache, session)
83+
new_vector_array_exporter(child.clone(), cache, session)
7684
}
7785
})
7886
.collect::<VortexResult<Vec<_>>>()?;

0 commit comments

Comments
 (0)