Skip to content

Commit 010b5e6

Browse files
authored
Add ensure_capacity to array builders and fix the case where list element builders didn't allocate enough capacity for all elements (#2698)
1 parent 11e2819 commit 010b5e6

File tree

12 files changed

+71
-15
lines changed

12 files changed

+71
-15
lines changed

encodings/fastlanes/src/bitpacking/compress.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ pub fn unpack_primitive<T: NativePType, UnsignedT: NativePType + BitPacking>(
213213
) -> VortexResult<PrimitiveArray> {
214214
let n = array.len();
215215
let mut builder = PrimitiveBuilder::with_capacity(array.dtype().nullability(), array.len());
216-
assert!(size_of::<T>() == size_of::<UnsignedT>());
216+
assert_eq!(size_of::<T>(), size_of::<UnsignedT>());
217217
unpack_into::<T, UnsignedT, _, _>(
218218
array,
219219
&mut builder,
@@ -224,7 +224,7 @@ pub fn unpack_primitive<T: NativePType, UnsignedT: NativePType + BitPacking>(
224224
// &mut [UnsignedT] is therefore safe.
225225
|x| unsafe { std::mem::transmute(x) },
226226
)?;
227-
assert!(builder.len() == n, "{} {}", builder.len(), n);
227+
assert_eq!(builder.len(), n);
228228
Ok(builder.finish_into_primitive())
229229
}
230230

encodings/fastlanes/src/bitpacking/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,10 +254,10 @@ impl ArrayCanonicalImpl for BitPackedArray {
254254
.as_any_mut()
255255
.downcast_mut()
256256
.vortex_expect("bit packed array must canonicalize into a primitive array"),
257-
// SAFETY: UnsignedT is the unsigned verison of T, reinterpreting &[UnsignedT] to
257+
// SAFETY: UnsignedT is the unsigned version of T, reinterpreting &[UnsignedT] to
258258
// &[T] is therefore safe.
259259
|x| unsafe { std::mem::transmute(x) },
260-
// SAFETY: UnsignedT is the unsigned verison of T, reinterpreting &mut [T] to
260+
// SAFETY: UnsignedT is the unsigned version of T, reinterpreting &mut [T] to
261261
// &mut [UnsignedT] is therefore safe.
262262
|x| unsafe { std::mem::transmute(x) },
263263
)

vortex-array/src/array/canonical.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ pub trait ArrayCanonicalImpl {
2121
/// - The length of the builder is incremented by the length of the input array.
2222
fn _append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
2323
let canonical = self._to_canonical()?;
24-
// debug!("default impl append_to_builder {}", canonical.encoding());
2524
builder.extend_from_array(canonical.as_ref())
2625
}
2726
}

vortex-array/src/builders/bool.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,13 @@ impl ArrayBuilder for BoolBuilder {
8686
Ok(())
8787
}
8888

89+
fn ensure_capacity(&mut self, capacity: usize) {
90+
if capacity > self.inner.capacity() {
91+
self.nulls.ensure_capacity(capacity);
92+
self.inner.reserve(capacity - self.inner.capacity());
93+
}
94+
}
95+
8996
fn set_validity(&mut self, validity: Mask) {
9097
self.nulls = LazyNullBufferBuilder::new(validity.len());
9198
self.nulls.append_validity_mask(validity);

vortex-array/src/builders/extension.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ impl ArrayBuilder for ExtensionBuilder {
8383
array.storage().append_to_builder(self.storage.as_mut())
8484
}
8585

86+
fn ensure_capacity(&mut self, capacity: usize) {
87+
self.storage.ensure_capacity(capacity)
88+
}
89+
8690
fn set_validity(&mut self, validity: Mask) {
8791
self.storage.set_validity(validity);
8892
}

vortex-array/src/builders/lazy_validity_builder.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,20 @@ impl LazyNullBufferBuilder {
135135
}
136136
}
137137

138+
pub fn ensure_capacity(&mut self, capacity: usize) {
139+
if self.inner.is_none() {
140+
self.capacity = capacity;
141+
} else {
142+
let inner = self
143+
.inner
144+
.as_mut()
145+
.vortex_expect("buffer just materialized");
146+
if capacity < inner.capacity() {
147+
inner.reserve(capacity - inner.len());
148+
}
149+
}
150+
}
151+
138152
#[inline]
139153
fn materialize_if_needed(&mut self) {
140154
if self.inner.is_none() {

vortex-array/src/builders/list.rs

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use crate::arrays::{ConstantArray, ListArray, OffsetPType};
1111
use crate::builders::lazy_validity_builder::LazyNullBufferBuilder;
1212
use crate::builders::{ArrayBuilder, ArrayBuilderExt, PrimitiveBuilder, builder_with_capacity};
1313
use crate::compute::{binary_numeric, slice, try_cast};
14-
use crate::{Array, ArrayRef};
14+
use crate::{Array, ArrayRef, ToCanonical};
1515

1616
pub struct ListBuilder<O: NativePType> {
1717
value_builder: Box<dyn ArrayBuilder>,
@@ -118,7 +118,7 @@ impl<O: OffsetPType> ArrayBuilder for ListBuilder<O> {
118118
fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()> {
119119
self.nulls.append_validity_mask(array.validity_mask()?);
120120

121-
let list = array.to_canonical()?.into_list()?;
121+
let list = array.to_list()?;
122122

123123
let cursor_usize = self.value_builder.len();
124124
let cursor = O::from_usize(cursor_usize).ok_or_else(|| {
@@ -141,16 +141,20 @@ impl<O: OffsetPType> ArrayBuilder for ListBuilder<O> {
141141

142142
if !list.is_empty() {
143143
let last_used_index = self.index_builder.values().last().vortex_expect("there must be at least one index because we just extended a non-zero list of offsets");
144-
self.value_builder.extend_from_array(&slice(
145-
list.elements(),
146-
0,
147-
last_used_index.as_() - cursor_usize,
148-
)?)?;
144+
let sliced_values = slice(list.elements(), 0, last_used_index.as_() - cursor_usize)?;
145+
self.value_builder.ensure_capacity(sliced_values.len());
146+
self.value_builder.extend_from_array(&sliced_values)?;
149147
}
150148

151149
Ok(())
152150
}
153151

152+
fn ensure_capacity(&mut self, capacity: usize) {
153+
self.index_builder.ensure_capacity(capacity);
154+
self.value_builder.ensure_capacity(capacity);
155+
self.nulls.ensure_capacity(capacity);
156+
}
157+
154158
fn set_validity(&mut self, validity: Mask) {
155159
self.nulls = LazyNullBufferBuilder::new(validity.len());
156160
self.nulls.append_validity_mask(validity);

vortex-array/src/builders/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ pub trait ArrayBuilder: Send {
5858
/// Extends the array with the provided array, canonicalizing if necessary.
5959
fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()>;
6060

61+
/// Ensure that the builder can hold at least `capacity` number of items
62+
fn ensure_capacity(&mut self, capacity: usize);
63+
6164
/// Override builders validity with the one provided
6265
fn set_validity(&mut self, validity: Mask);
6366

vortex-array/src/builders/null.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ impl ArrayBuilder for NullBuilder {
5555
Ok(())
5656
}
5757

58+
fn ensure_capacity(&mut self, _capacity: usize) {}
59+
5860
fn set_validity(&mut self, validity: Mask) {
5961
self.length = validity.len();
6062
}

vortex-array/src/builders/primitive.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use crate::builders::ArrayBuilder;
1212
use crate::builders::lazy_validity_builder::LazyNullBufferBuilder;
1313
use crate::validity::Validity;
1414
use crate::variants::PrimitiveArrayTrait;
15-
use crate::{Array, ArrayRef};
15+
use crate::{Array, ArrayRef, ToCanonical};
1616

1717
/// Builder for [`PrimitiveArray`].
1818
pub struct PrimitiveBuilder<T> {
@@ -89,7 +89,8 @@ impl<T: NativePType> PrimitiveBuilder<T> {
8989
let offset = self.values.len();
9090
assert!(
9191
offset + len <= self.values.capacity(),
92-
"uninit_range of len {len} exceeds builder capacity"
92+
"uninit_range of len {len} exceeds builder capacity {}",
93+
self.values.capacity()
9394
);
9495

9596
UninitRange {
@@ -166,7 +167,7 @@ impl<T: NativePType> ArrayBuilder for PrimitiveBuilder<T> {
166167
}
167168

168169
fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()> {
169-
let array = array.to_canonical()?.into_primitive()?;
170+
let array = array.to_primitive()?;
170171
if array.ptype() != T::PTYPE {
171172
vortex_bail!("Cannot extend from array with different ptype");
172173
}
@@ -178,6 +179,13 @@ impl<T: NativePType> ArrayBuilder for PrimitiveBuilder<T> {
178179
Ok(())
179180
}
180181

182+
fn ensure_capacity(&mut self, capacity: usize) {
183+
if capacity > self.values.capacity() {
184+
self.values.reserve(capacity - self.values.len());
185+
self.nulls.ensure_capacity(capacity);
186+
}
187+
}
188+
181189
fn set_validity(&mut self, validity: Mask) {
182190
self.nulls = LazyNullBufferBuilder::new(validity.len());
183191
self.nulls.append_validity_mask(validity);

0 commit comments

Comments
 (0)