Skip to content

Commit e586ab6

Browse files
committed
address comments
1 parent 8f5772d commit e586ab6

File tree

1 file changed

+52
-35
lines changed

1 file changed

+52
-35
lines changed

encodings/sparse/src/canonical.rs

Lines changed: 52 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use vortex_array::patches::Patches;
1212
use vortex_array::validity::Validity;
1313
use vortex_array::vtable::CanonicalVTable;
1414
use vortex_array::{Array, Canonical, ToCanonical as _};
15-
use vortex_buffer::{Buffer, BufferMut, buffer};
15+
use vortex_buffer::{Buffer, buffer, buffer_mut};
1616
use vortex_dtype::{
1717
DType, DecimalDType, NativePType, Nullability, StructFields, match_each_integer_ptype,
1818
match_each_native_ptype,
@@ -235,26 +235,18 @@ fn canonicalize_utf8<I: NativePType>(
235235
)
236236
} else {
237237
// any <=12 character value will do
238-
BinaryView::make_view("FILL_VALUE".as_ref(), 0, 0)
238+
BinaryView::make_view(&[], 0, 0)
239239
};
240240

241-
let mut view_builder = BufferMut::<BinaryView>::with_capacity(len);
242-
let mut i = 0usize;
241+
let mut views = buffer_mut![fill; len];
243242
for (patch_index, &patch) in indices.into_iter().zip_eq(values.views().iter()) {
244243
let patch_index_usize = <usize as NumCast>::from(patch_index)
245244
.vortex_expect("var bin view indices must fit in usize");
246-
for _ in i..patch_index_usize {
247-
view_builder.push(fill)
248-
}
249-
view_builder.push(patch);
250-
i = patch_index_usize + 1;
251-
}
252-
for _ in i..len {
253-
view_builder.push(fill)
245+
views[patch_index_usize] = patch;
254246
}
255247

256248
let array = VarBinViewArray::try_new(
257-
view_builder.freeze(),
249+
views.freeze(),
258250
buffers,
259251
DType::Utf8(validity.nullability()),
260252
validity,
@@ -265,10 +257,10 @@ fn canonicalize_utf8<I: NativePType>(
265257

266258
#[cfg(test)]
267259
mod test {
268-
use itertools::Itertools as _;
269260
use rstest::rstest;
270261
use vortex_array::arrays::{
271-
BoolArray, BooleanBufferBuilder, DecimalArray, PrimitiveArray, StructArray, VarBinViewArray,
262+
BoolArray, BooleanBufferBuilder, DecimalArray, PrimitiveArray, StructArray, VarBinArray,
263+
VarBinViewArray,
272264
};
273265
use vortex_array::arrow::IntoArrowArray as _;
274266
use vortex_array::validity::Validity;
@@ -634,26 +626,6 @@ mod test {
634626
])
635627
.into_array();
636628

637-
println!(
638-
"strings: {}",
639-
(0..strings.len())
640-
.map(|i| strings.scalar_at(i).unwrap())
641-
.join(",")
642-
);
643-
let va = strings.validity_mask().unwrap().into_array();
644-
println!(
645-
"strings_validity: {}",
646-
(0..va.len())
647-
.map(|i| va
648-
.scalar_at(i)
649-
.unwrap()
650-
.as_bool()
651-
.value()
652-
.map_or("N", |b| if b { "T" } else { "F" })
653-
.to_owned())
654-
.join(",")
655-
);
656-
657629
let array = SparseArray::try_new(
658630
buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
659631
strings,
@@ -720,4 +692,49 @@ mod test {
720692
assert_eq!(actual.data_type(), expected.data_type());
721693
assert_eq!(&actual, &expected);
722694
}
695+
696+
#[test]
697+
fn test_sparse_varbin_null_fill() {
698+
let strings = <VarBinArray as FromIterator<_>>::from_iter([
699+
Some("hello"),
700+
Some("goodbye"),
701+
Some("hello"),
702+
None,
703+
Some("bonjour"),
704+
Some("你好"),
705+
None,
706+
])
707+
.into_array();
708+
709+
let array = SparseArray::try_new(
710+
buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
711+
strings,
712+
12,
713+
Scalar::null(DType::Utf8(Nullable)),
714+
)
715+
.unwrap();
716+
717+
let actual = array.to_varbinview().unwrap().into_array();
718+
let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
719+
Some("hello"),
720+
None,
721+
None,
722+
Some("goodbye"),
723+
Some("hello"),
724+
None,
725+
None,
726+
Some("bonjour"),
727+
None,
728+
Some("你好"),
729+
None,
730+
None,
731+
])
732+
.into_array();
733+
734+
let actual = actual.into_arrow_preferred().unwrap();
735+
let expected = expected.into_arrow_preferred().unwrap();
736+
737+
assert_eq!(actual.data_type(), expected.data_type());
738+
assert_eq!(&actual, &expected);
739+
}
723740
}

0 commit comments

Comments
 (0)