Skip to content

Commit ae37e36

Browse files
committed
implement batch operations for list vectors
Signed-off-by: Connor Tsui <[email protected]>
1 parent 2818b02 commit ae37e36

File tree

2 files changed

+110
-11
lines changed

2 files changed

+110
-11
lines changed

vortex-vector/src/listview/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
//! within the flat elements array. This allows for efficient access to individual lists without
1111
//! copying data. This is similar to Apache Arrow's `ListView` type.
1212
13+
// TODO(connor): More docs and examples.
14+
1315
mod vector;
1416
pub use vector::ListViewVector;
1517

vortex-vector/src/listview/vector_mut.rs

Lines changed: 108 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ use vortex_mask::MaskMut;
1111

1212
use super::ListViewVector;
1313
use crate::ops::VectorMutOps;
14-
use crate::primitive::PrimitiveVectorMut;
15-
use crate::{VectorMut, match_each_integer_pvector_mut};
14+
use crate::primitive::{PrimitiveVector, PrimitiveVectorMut};
15+
use crate::{VectorMut, VectorOps, match_each_integer_pvector, match_each_integer_pvector_mut};
1616

1717
/// A mutable vector of variable-width lists.
1818
///
@@ -131,7 +131,7 @@ impl ListViewVectorMut {
131131
);
132132

133133
// Check that each `offsets[i] + sizes[i] <= elements.len()`.
134-
validate_views_bound(elements.len(), &offsets, &sizes)?;
134+
validate_views_bound(elements.len() as u64, &offsets, &sizes)?;
135135

136136
Ok(Self {
137137
elements,
@@ -237,12 +237,72 @@ impl VectorMutOps for ListViewVectorMut {
237237
self.validity.reserve(additional);
238238
}
239239

240-
fn extend_from_vector(&mut self, _other: &ListViewVector) {
241-
todo!()
240+
/// This will also panic if we try to extend the `ListViewVector` beyond the maximum offset
241+
/// representable by the type of the `offsets` primitive vector.
242+
fn extend_from_vector(&mut self, other: &ListViewVector) {
243+
// Extend the elements with the other's elements.
244+
let old_elements_len = self.elements.len() as u64;
245+
self.elements.extend_from_vector(&other.elements);
246+
let new_elements_len = self.elements.len() as u64;
247+
248+
// Then extend the sizes with the other's sizes (these do not need any adjustment).
249+
self.sizes.extend_from_vector(&other.sizes);
250+
251+
// We need this assertion to ensure that the casts below are infallible.
252+
assert!(
253+
new_elements_len < self.offsets.ptype().max_value_as_u64(),
254+
"the elements length {new_elements_len} is not representable by the offsets type {}",
255+
self.offsets.ptype()
256+
);
257+
258+
// Finally, extend the offsets after adding the old `elements` length to each.
259+
adjust_and_extend_offsets(&mut self.offsets, &other.offsets, old_elements_len);
260+
261+
self.validity.append_mask(&other.validity);
262+
self.len += other.len;
263+
debug_assert_eq!(self.len, self.validity.len());
242264
}
243265

244-
fn append_nulls(&mut self, _n: usize) {
245-
todo!("Need to figure out what the 'value' of nulls are for list view vectors")
266+
fn append_nulls(&mut self, n: usize) {
267+
// To support easier copying to Arrow `List`s, we point the null views towards the ends of
268+
// the `elements` vector (with size 0) to hopefully keep offsets sorted if they were already
269+
// sorted.
270+
let elements_len = self.elements.len();
271+
272+
debug_assert!(
273+
(elements_len as u64) < self.offsets.ptype().max_value_as_u64(),
274+
"the elements length {elements_len} is somehow not representable by the offsets type {}",
275+
self.offsets.ptype()
276+
);
277+
278+
self.offsets.reserve(n);
279+
self.sizes.reserve(n);
280+
281+
match_each_integer_pvector_mut!(&mut self.offsets, |offsets_vec| {
282+
for _ in 0..n {
283+
// SAFETY: We just reserved capacity for `n` elements above, and the cast must
284+
// succeed because the elements length must be representable by the offset type.
285+
#[allow(clippy::cast_possible_truncation)]
286+
unsafe {
287+
offsets_vec.push_unchecked(elements_len as _)
288+
};
289+
}
290+
});
291+
292+
match_each_integer_pvector_mut!(&mut self.sizes, |sizes_vec| {
293+
for _ in 0..n {
294+
// SAFETY: We just reserved capacity for `n` elements above, and `0` is
295+
// representable by all integer types.
296+
#[allow(clippy::cast_possible_truncation)]
297+
unsafe {
298+
sizes_vec.push_unchecked(0 as _)
299+
};
300+
}
301+
});
302+
303+
self.validity.append_n(false, n);
304+
self.len += n;
305+
debug_assert_eq!(self.len, self.validity.len());
246306
}
247307

248308
fn freeze(self) -> ListViewVector {
@@ -267,9 +327,9 @@ impl VectorMutOps for ListViewVectorMut {
267327
// TODO(connor): It would be better to separate everything inside the macros into its own function,
268328
// but that would require adding another macro that sets a type `$type` to be used by the caller.
269329
/// Checks that all views are `<= elements_len`.
270-
#[allow(clippy::cognitive_complexity, clippy::cast_possible_truncation)]
330+
#[allow(clippy::cognitive_complexity)]
271331
fn validate_views_bound(
272-
elements_len: usize,
332+
elements_len: u64,
273333
offsets: &PrimitiveVectorMut,
274334
sizes: &PrimitiveVectorMut,
275335
) -> VortexResult<()> {
@@ -280,13 +340,50 @@ fn validate_views_bound(
280340
let offsets_slice = offsets_vector.as_ref();
281341
let sizes_slice = sizes_vector.as_ref();
282342

343+
#[allow(clippy::unnecessary_cast)]
283344
for i in 0..len {
284-
let offset = offsets_slice[i] as usize;
285-
let size = sizes_slice[i] as usize;
345+
let offset = offsets_slice[i] as u64;
346+
let size = sizes_slice[i] as u64;
286347
vortex_ensure!(offset + size <= elements_len);
287348
}
288349
});
289350
});
290351

291352
Ok(())
292353
}
354+
355+
// TODO(connor): It would be better to separate everything inside the macros into its own function,
356+
// but that would require adding another macro that sets a type `$type` to be used by the caller.
357+
/// Checks that all views are `<= elements_len`.
358+
#[allow(clippy::cognitive_complexity)]
359+
fn adjust_and_extend_offsets(
360+
our_offsets: &mut PrimitiveVectorMut,
361+
other: &PrimitiveVector,
362+
old_elements_len: u64,
363+
) {
364+
our_offsets.reserve(other.len());
365+
366+
// Adjust each offset from `other` by adding the current elements length to each of the
367+
// incoming offsets.
368+
match_each_integer_pvector_mut!(our_offsets, |self_offsets| {
369+
match_each_integer_pvector!(other, |other_offsets| {
370+
let other_offsets_slice = other_offsets.as_ref();
371+
372+
// Append each offset from `other`, adjusted by the elements_offset.
373+
for i in 0..other.len() {
374+
// All offset types are representable via a `u64` since we also ensure offsets
375+
// are always non-negative.
376+
#[allow(clippy::unnecessary_cast)]
377+
let adjusted_offset = other_offsets_slice[i] as u64 + old_elements_len;
378+
379+
// SAFETY: We just reserved capacity for `other.len()` elements above, and we
380+
// also know the cast is fine because we verified above that the maximum
381+
// possible offset is representable by the offset type.
382+
#[allow(clippy::cast_possible_truncation)]
383+
unsafe {
384+
self_offsets.push_unchecked(adjusted_offset as _);
385+
}
386+
}
387+
});
388+
});
389+
}

0 commit comments

Comments
 (0)