|
10 | 10 |
|
11 | 11 | use std::sync::Arc; |
12 | 12 |
|
13 | | -use vortex_dtype::{DType, IntegerPType, Nullability}; |
| 13 | +use vortex_dtype::{DType, IntegerPType, Nullability, match_each_integer_ptype}; |
14 | 14 | use vortex_error::{VortexExpect, VortexResult, vortex_ensure, vortex_panic}; |
15 | 15 | use vortex_mask::Mask; |
16 | 16 | use vortex_scalar::{ListScalar, Scalar}; |
17 | 17 |
|
18 | 18 | use crate::array::{Array, ArrayRef, IntoArray}; |
19 | | -use crate::arrays::{ListViewArray, ListViewShape}; |
| 19 | +use crate::arrays::{ListViewArray, ListViewRebuildMode, ListViewShape, PrimitiveArray}; |
20 | 20 | use crate::builders::lazy_null_builder::LazyBitBufferBuilder; |
21 | 21 | use crate::builders::{ |
22 | | - ArrayBuilder, DEFAULT_BUILDER_CAPACITY, PrimitiveBuilder, builder_with_capacity, |
| 22 | + ArrayBuilder, DEFAULT_BUILDER_CAPACITY, PrimitiveBuilder, UninitRange, builder_with_capacity, |
23 | 23 | }; |
24 | | -use crate::{Canonical, ToCanonical}; |
| 24 | +use crate::{Canonical, ToCanonical, compute}; |
25 | 25 |
|
26 | 26 | /// A builder for creating [`ListViewArray`] instances, parameterized by the [`IntegerPType`] of |
27 | 27 | /// the `offsets` and the `sizes` builders. |
@@ -236,28 +236,74 @@ impl<O: IntegerPType, S: IntegerPType> ArrayBuilder for ListViewBuilder<O, S> { |
236 | 236 | } |
237 | 237 |
|
238 | 238 | unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) { |
239 | | - let listview_array = array.to_listview(); |
240 | | - if listview_array.is_empty() { |
| 239 | + let listview = array.to_listview(); |
| 240 | + if listview.is_empty() { |
241 | 241 | return; |
242 | 242 | } |
243 | 243 |
|
244 | | - // TODO NOW: Rebuild the array we are extending by to be zero-copy to list and then be |
245 | | - // smarter about this. |
246 | | - |
247 | | - // TODO(connor)[ListView]: We could potentially concatenate the new elements on top of the |
248 | | - // existing elements and recalculate offsets (and then use `UninitRange`). However, that |
249 | | - // would mean we lose the guarantee that the output `ListViewArray` does not look like a |
250 | | - // `ListArray` (because the incoming array could have garbage data). |
251 | | - |
252 | | - // We assume the worst case scenario, where the list view array is stored completely out of |
253 | | - // order, with many out-of-order offsets, and lots of garbage data. Thus, we simply iterate |
254 | | - // over all of the lists in the array and copy the data into this builder. |
255 | | - for i in 0..listview_array.len() { |
256 | | - let list = listview_array.scalar_at(i); |
257 | | - |
258 | | - self.append_scalar(&list) |
259 | | - .vortex_expect("was unable to extend the `ListViewBuilder`") |
| 244 | + // If we do not have the guarantee that the array is zero-copyable to a list, then we have |
| 245 | + // to manually append each scalar. |
| 246 | + if !listview.shape().is_zero_copy_to_list() { |
| 247 | + for i in 0..listview.len() { |
| 248 | + let list = listview.scalar_at(i); |
| 249 | + |
| 250 | + self.append_scalar(&list) |
| 251 | + .vortex_expect("was unable to extend the `ListViewBuilder`") |
| 252 | + } |
260 | 253 | } |
| 254 | + |
| 255 | + // Otherwise, after removing any leading and trailing elements, we can simply bulk append |
| 256 | + // the entire array. |
| 257 | + let listview = listview.rebuild(ListViewRebuildMode::TrimElements); |
| 258 | + |
| 259 | + self.nulls.append_validity_mask(array.validity_mask()); |
| 260 | + self.elements_builder.extend_from_array(listview.elements()); |
| 261 | + |
| 262 | + self.sizes_builder.extend_from_array( |
| 263 | + compute::cast(listview.sizes(), self.sizes_builder.dtype()) |
| 264 | + .vortex_expect( |
| 265 | + "was somehow unable to cast the new sizes to the type of the builder sizes", |
| 266 | + ) |
| 267 | + .as_ref(), |
| 268 | + ); |
| 269 | + |
| 270 | + // Adjust all of the offsets. |
| 271 | + let new_offsets = listview.offsets().to_primitive(); // This should be cheap. |
| 272 | + let num_new_offsets = new_offsets.len(); |
| 273 | + |
| 274 | + let curr_builder_len = self.len(); |
| 275 | + self.offsets_builder.reserve_exact(num_new_offsets); |
| 276 | + |
| 277 | + let uninit_range = self.offsets_builder.uninit_range(num_new_offsets); |
| 278 | + |
| 279 | + fn adjust_offsets<'a, O: IntegerPType, A: IntegerPType>( |
| 280 | + mut uninit_range: UninitRange<'a, O>, |
| 281 | + new_offsets: PrimitiveArray, |
| 282 | + curr_builder_len: usize, |
| 283 | + ) { |
| 284 | + let new_offsets_slice = new_offsets.as_slice::<A>(); |
| 285 | + let curr_builder_len = O::from_usize(curr_builder_len) |
| 286 | + .vortex_expect("the builder length did not fit into the offset type (impossible)"); |
| 287 | + |
| 288 | + for i in 0..uninit_range.len() { |
| 289 | + let new_offset = new_offsets_slice[i]; |
| 290 | + let new_offset_correct_type = O::from_usize( |
| 291 | + new_offset |
| 292 | + .to_usize() |
| 293 | + .vortex_expect("Offsets must always fit in usize"), |
| 294 | + ) |
| 295 | + .vortex_expect("New offset somehow did not fit into the builder's offset type"); |
| 296 | + |
| 297 | + uninit_range.set_value(i, new_offset_correct_type + curr_builder_len); |
| 298 | + } |
| 299 | + // SAFETY: We have set all the values in the range, and since `offsets` are |
| 300 | + // non-nullable, we are done. |
| 301 | + unsafe { uninit_range.finish() }; |
| 302 | + } |
| 303 | + |
| 304 | + match_each_integer_ptype!(new_offsets.ptype(), |A| { |
| 305 | + adjust_offsets::<O, A>(uninit_range, new_offsets, curr_builder_len); |
| 306 | + }) |
261 | 307 | } |
262 | 308 |
|
263 | 309 | fn reserve_exact(&mut self, capacity: usize) { |
@@ -481,19 +527,15 @@ mod tests { |
481 | 527 | .unwrap(); |
482 | 528 |
|
483 | 529 | // Extend from the ListArray. |
484 | | - unsafe { |
485 | | - builder.extend_from_array_unchecked(&source.into_array()); |
486 | | - } |
| 530 | + builder.extend_from_array(&source.into_array()); |
487 | 531 |
|
488 | 532 | // Extend from empty array (should be no-op). |
489 | 533 | let empty_source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>( |
490 | 534 | std::iter::empty::<Option<Vec<i32>>>(), |
491 | 535 | Arc::new(I32.into()), |
492 | 536 | ) |
493 | 537 | .unwrap(); |
494 | | - unsafe { |
495 | | - builder.extend_from_array_unchecked(&empty_source.into_array()); |
496 | | - } |
| 538 | + builder.extend_from_array(&empty_source.into_array()); |
497 | 539 |
|
498 | 540 | let listview = builder.finish_into_listview(); |
499 | 541 | assert_eq!(listview.len(), 4); |
|
0 commit comments