Skip to content

Commit 0415270

Browse files
joseph-isaacsclaude
andcommitted
fix: use into_arrow with specific DataType for VarBin conversion
Change VarBinScheme to use into_arrow() with explicit Arrow DataType (Utf8 or Binary) instead of into_arrow_preferred() which returns StringView/BinaryView arrays. This ensures proper conversion: - VarBinViewArray -> Arrow GenericByteArray (Utf8/Binary) - Arrow GenericByteArray -> VarBinArray Also adds arrow-schema dependency to vortex-btrblocks. Signed-off-by: Joe Isaacs <[email protected]> Co-Authored-By: Claude <[email protected]>
1 parent 3ddbe35 commit 0415270

File tree

3 files changed

+11
-2
lines changed

3 files changed

+11
-2
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-btrblocks/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ rust-version = { workspace = true }
1414
version = { workspace = true }
1515

1616
[dependencies]
17+
arrow-schema = { workspace = true }
1718
getrandom_v03 = { workspace = true }
1819
itertools = { workspace = true }
1920
log = { workspace = true }

vortex-btrblocks/src/string.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use vortex_array::arrays::{
77
use vortex_array::builders::dict::dict_encode;
88
use vortex_array::vtable::ValidityHelper;
99
use vortex_array::{ArrayRef, IntoArray, ToCanonical};
10+
use vortex_dtype::DType;
1011
use vortex_error::{VortexExpect, VortexResult};
1112
use vortex_fsst::{FSSTArray, fsst_compress, fsst_train_compressor};
1213
use vortex_scalar::Scalar;
@@ -205,8 +206,14 @@ impl Scheme for VarBinScheme {
205206
) -> VortexResult<ArrayRef> {
206207
use vortex_array::arrow::{FromArrowArray, IntoArrowArray};
207208

208-
// Convert VarBinView -> Arrow -> VarBin using the canonical Arrow conversion path
209-
let arrow_array = stats.source().to_array().into_arrow_preferred()?;
209+
let arrow_dtype = match stats.src.dtype() {
210+
DType::Utf8(..) => arrow_schema::DataType::Utf8,
211+
DType::Binary(..) => arrow_schema::DataType::Binary,
212+
_ => unreachable!("VarBinView must be Utf8 or Binary"),
213+
};
214+
215+
// Convert VarBinView -> Arrow VarBin -> Vortex VarBin
216+
let arrow_array = stats.source().to_array().into_arrow(&arrow_dtype)?;
210217
let nullable = stats.source().dtype().is_nullable();
211218

212219
Ok(ArrayRef::from_arrow(arrow_array.as_ref(), nullable))

0 commit comments

Comments
 (0)