|
5 | 5 |
|
6 | 6 | use std::sync::Arc; |
7 | 7 |
|
| 8 | +use num_traits::NumCast; |
8 | 9 | use vortex_dtype::DType; |
9 | 10 | use vortex_dtype::PType; |
10 | 11 | use vortex_error::VortexExpect; |
@@ -185,11 +186,16 @@ impl ListViewVectorMut { |
185 | 186 |
|
186 | 187 | /// Creates a new [`ListViewVectorMut`] with the specified capacity. |
187 | 188 | pub fn with_capacity(element_dtype: &DType, capacity: usize) -> Self { |
| 189 | + let offsets_ptype = PType::min_unsigned_ptype_for_value(capacity as u64); |
| 190 | + let sizes_ptype = offsets_ptype; |
| 191 | + |
| 192 | + // SAFETY: Everything is empty and the offsets and sizes `PType` is the same, so all |
| 193 | + // invariants are satisfied. |
188 | 194 | unsafe { |
189 | 195 | Self::new_unchecked( |
190 | 196 | Box::new(VectorMut::with_capacity(element_dtype, 0)), |
191 | | - PrimitiveVectorMut::with_capacity(PType::U64, capacity), |
192 | | - PrimitiveVectorMut::with_capacity(PType::U32, capacity), |
| 197 | + PrimitiveVectorMut::with_capacity(offsets_ptype, capacity), |
| 198 | + PrimitiveVectorMut::with_capacity(sizes_ptype, capacity), |
193 | 199 | MaskMut::with_capacity(capacity), |
194 | 200 | ) |
195 | 201 | } |
@@ -300,27 +306,23 @@ impl VectorMutOps for ListViewVectorMut { |
300 | 306 | self.len = self.validity.len(); |
301 | 307 | } |
302 | 308 |
|
303 | | - /// This will also panic if we try to extend the `ListViewVector` beyond the maximum offset |
304 | | - /// representable by the type of the `offsets` primitive vector. |
305 | 309 | fn extend_from_vector(&mut self, other: &ListViewVector) { |
306 | 310 | // Extend the elements with the other's elements. |
307 | 311 | let old_elements_len = self.elements.len() as u64; |
308 | 312 | self.elements.extend_from_vector(&other.elements); |
309 | 313 | let new_elements_len = self.elements.len() as u64; |
310 | 314 |
|
311 | | - // Then extend the sizes with the other's sizes (these do not need any adjustment). |
312 | | - self.sizes.extend_from_vector(&other.sizes); |
| 315 | + // Extend sizes with automatic upcasting (does not panic on type mismatch). |
| 316 | + self.sizes.extend_from_vector_with_upcast(&other.sizes); |
313 | 317 |
|
314 | | - // We need this assertion to ensure that the casts below are infallible. |
315 | | - assert!( |
316 | | - new_elements_len < self.offsets.ptype().max_value_as_u64(), |
317 | | - "the elements length {new_elements_len} is not representable by the offsets type {}", |
318 | | - self.offsets.ptype() |
| 318 | + // Extend offsets with adjustment and automatic upcasting based on `new_elements_len`. |
| 319 | + adjust_and_extend_offsets( |
| 320 | + &mut self.offsets, |
| 321 | + &other.offsets, |
| 322 | + old_elements_len, |
| 323 | + new_elements_len, |
319 | 324 | ); |
320 | 325 |
|
321 | | - // Finally, extend the offsets after adding the old `elements` length to each. |
322 | | - adjust_and_extend_offsets(&mut self.offsets, &other.offsets, old_elements_len); |
323 | | - |
324 | 326 | self.validity.append_mask(&other.validity); |
325 | 327 | self.len += other.len; |
326 | 328 | debug_assert_eq!(self.len, self.validity.len()); |
@@ -505,39 +507,60 @@ fn validate_views_bound( |
505 | 507 | Ok(()) |
506 | 508 | } |
507 | 509 |
|
508 | | -// TODO(connor): It would be better to separate everything inside the macros into its own function, |
509 | | -// but that would require adding another macro that sets a type `$type` to be used by the caller. |
510 | | -/// Checks that all views are `<= elements_len`. |
| 510 | +/// Adjusts and extends offsets from `new_offsets` into `curr_offsets`, upcasting if needed. |
| 511 | +/// |
| 512 | +/// Each offset from `new_offsets` is adjusted by adding `old_elements_len` before appending. |
| 513 | +/// |
| 514 | +/// If the resulting offsets would exceed the current offset type's capacity, the offset vector is |
| 515 | +/// automatically upcasted to a wider type. |
511 | 516 | #[expect( |
512 | 517 | clippy::cognitive_complexity, |
513 | 518 | reason = "complexity from nested match_each_* macros" |
514 | 519 | )] |
515 | 520 | fn adjust_and_extend_offsets( |
516 | | - our_offsets: &mut PrimitiveVectorMut, |
517 | | - other: &PrimitiveVector, |
| 521 | + curr_offsets: &mut PrimitiveVectorMut, |
| 522 | + new_offsets: &PrimitiveVector, |
518 | 523 | old_elements_len: u64, |
| 524 | + new_elements_len: u64, |
519 | 525 | ) { |
520 | | - our_offsets.reserve(other.len()); |
| 526 | + // Make sure we use the correct width to fit all offsets. |
| 527 | + let target_ptype = PType::min_unsigned_ptype_for_value(new_elements_len) |
| 528 | + .max_unsigned_ptype(curr_offsets.ptype()) |
| 529 | + .max_unsigned_ptype(new_offsets.ptype()); |
| 530 | + |
| 531 | + if curr_offsets.ptype() != target_ptype { |
| 532 | + let old_offsets = std::mem::replace( |
| 533 | + curr_offsets, |
| 534 | + PrimitiveVectorMut::with_capacity(target_ptype, 0), |
| 535 | + ); |
| 536 | + *curr_offsets = old_offsets.upcast(target_ptype); |
| 537 | + } |
521 | 538 |
|
522 | | - // Adjust each offset from `other` by adding the current elements length to each of the |
| 539 | + curr_offsets.reserve(new_offsets.len()); |
| 540 | + |
| 541 | + // Adjust each offset from `new_offsets` by adding the current elements length to each of the |
523 | 542 | // incoming offsets. |
524 | | - match_each_integer_pvector_mut!(our_offsets, |self_offsets| { |
525 | | - match_each_integer_pvector!(other, |other_offsets| { |
526 | | - let other_offsets_slice = other_offsets.as_ref(); |
527 | | - |
528 | | - // Append each offset from `other`, adjusted by the elements_offset. |
529 | | - for i in 0..other.len() { |
530 | | - // All offset types are representable via a `u64` since we also ensure offsets |
531 | | - // are always non-negative. |
| 543 | + match_each_integer_pvector_mut!(curr_offsets, |curr| { |
| 544 | + match_each_integer_pvector!(new_offsets, |new| { |
| 545 | + let new_offsets_slice = new.as_ref(); |
| 546 | + |
| 547 | + // Append each offset from `new_offsets`, adjusted by the elements_offset. |
| 548 | + for i in 0..new.len() { |
| 549 | + // All offset types are representable via a `u64` since we also ensure offsets are |
| 550 | + // always non-negative. |
532 | 551 | #[allow(clippy::unnecessary_cast)] |
533 | | - let adjusted_offset = other_offsets_slice[i] as u64 + old_elements_len; |
| 552 | + let adjusted_offset = new_offsets_slice[i] as u64 + old_elements_len; |
| 553 | + debug_assert!( |
| 554 | + adjusted_offset < new_elements_len, |
| 555 | + "new list view offset is somehow out of bounds, something has gone wrong" |
| 556 | + ); |
534 | 557 |
|
535 | | - // SAFETY: We just reserved capacity for `other.len()` elements above, and we |
536 | | - // also know the cast is fine because we verified above that the maximum |
537 | | - // possible offset is representable by the offset type. |
538 | | - #[allow(clippy::cast_possible_truncation)] |
| 558 | + let converted = NumCast::from(adjusted_offset) |
| 559 | + .vortex_expect("offset conversion should succeed after upcast"); |
| 560 | + |
| 561 | + // SAFETY: We reserved capacity for `new_offsets.len()` elements above. |
539 | 562 | unsafe { |
540 | | - self_offsets.push_unchecked(adjusted_offset as _); |
| 563 | + curr.push_unchecked(converted); |
541 | 564 | } |
542 | 565 | } |
543 | 566 | }); |
|
0 commit comments