Skip to content

Commit 7150be2

Browse files
Use canonical in duckdb instead of directly to arrow (#2956)
1 parent 0ef8c03 commit 7150be2

File tree

1 file changed

+23
-5
lines changed
  • vortex-duckdb/src/convert/array

1 file changed

+23
-5
lines changed

vortex-duckdb/src/convert/array/mod.rs

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use vortex_array::compute::{take, to_arrow_preferred};
1717
use vortex_array::validity::Validity;
1818
use vortex_array::variants::PrimitiveArrayTrait;
1919
use vortex_array::vtable::EncodingVTable;
20-
use vortex_array::{Array, ArrayRef, ArrayStatistics, ToCanonical};
20+
use vortex_array::{Array, ArrayRef, ArrayStatistics, IntoArray, ToCanonical};
2121
use vortex_dict::{DictArray, DictEncoding};
2222
use vortex_dtype::{NativePType, match_each_integer_ptype};
2323
use vortex_error::{VortexExpect, VortexResult, vortex_err};
@@ -57,36 +57,54 @@ pub fn to_duckdb(
5757
chunk: &mut dyn WritableVector,
5858
cache: &mut ConversionCache,
5959
) -> VortexResult<()> {
60+
if try_to_duckdb(array, chunk, cache)?.is_some() {
61+
return Ok(());
62+
};
63+
let canonical_array = array.to_canonical()?.into_array();
64+
if try_to_duckdb(&canonical_array, chunk, cache)?.is_some() {
65+
return Ok(());
66+
};
67+
to_arrow_preferred(&canonical_array)?.to_duckdb(chunk, cache)
68+
}
69+
70+
fn try_to_duckdb(
71+
array: &ArrayRef,
72+
chunk: &mut dyn WritableVector,
73+
cache: &mut ConversionCache,
74+
) -> VortexResult<Option<()>> {
6075
if let Some(constant) = array.as_constant() {
6176
let value = constant.try_to_duckdb_scalar()?;
6277
chunk.flat_vector().assign_to_constant(&value);
63-
Ok(())
78+
Ok(Some(()))
6479
} else if array.is_encoding(ChunkedEncoding.id()) {
6580
array
6681
.as_any()
6782
.downcast_ref::<ChunkedArray>()
6883
.vortex_expect("chunk checked")
6984
.to_duckdb(chunk, cache)
85+
.map(Some)
7086
} else if array.is_encoding(VarBinViewEncoding.id()) {
7187
array
7288
.as_any()
7389
.downcast_ref::<VarBinViewArray>()
7490
.vortex_expect("varbinview id checked")
7591
.to_duckdb(chunk, cache)
92+
.map(Some)
7693
} else if array.is_encoding(FSSTEncoding.id()) {
7794
let arr = array
7895
.as_any()
7996
.downcast_ref::<FSSTArray>()
8097
.vortex_expect("FSSTArray id checked");
81-
arr.to_varbinview()?.to_duckdb(chunk, cache)
98+
arr.to_varbinview()?.to_duckdb(chunk, cache).map(Some)
8299
} else if array.is_encoding(DictEncoding.id()) {
83100
array
84101
.as_any()
85102
.downcast_ref::<DictArray>()
86103
.vortex_expect("dict id checked")
87104
.to_duckdb(chunk, cache)
105+
.map(Some)
88106
} else {
89-
to_arrow_preferred(array)?.to_duckdb(chunk, cache)
107+
Ok(None)
90108
}
91109
}
92110

@@ -98,7 +116,7 @@ impl ToDuckDB for ChunkedArray {
98116
) -> VortexResult<()> {
99117
// TODO(joe): support multi-chunk arrays without canonical.
100118
if self.chunks().len() > 1 {
101-
to_arrow_preferred(self)?.to_duckdb(chunk, cache)
119+
to_duckdb(&self.to_canonical()?.into_array(), chunk, cache)
102120
} else {
103121
to_duckdb(&self.chunks()[0], chunk, cache)
104122
}

0 commit comments

Comments
 (0)