Skip to content

Commit 66844f2

Browse files
authored
Chore: fix from_iter impls in vectors (#5075)
Didn't make it into the last one by accident --------- Signed-off-by: Connor Tsui <[email protected]>
1 parent e570ee4 commit 66844f2

File tree

7 files changed

+62
-39
lines changed

7 files changed

+62
-39
lines changed

vortex-buffer/src/bit/buf_mut.rs

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ impl BitBufferMut {
7272
}
7373
}
7474

75+
/// Consumes the buffer and return the underlying byte buffer.
76+
pub fn into_inner(self) -> ByteBufferMut {
77+
self.buffer
78+
}
79+
7580
/// Create a new mutable buffer with requested `len` and all bits set to `true`.
7681
pub fn new_set(len: usize) -> Self {
7782
Self {
@@ -205,20 +210,24 @@ impl BitBufferMut {
205210

206211
/// Set the bit at `index` to `true` without checking bounds.
207212
///
213+
/// Note: Do not call this in a tight loop. Prefer to use [`set_bit_unchecked`].
214+
///
208215
/// # Safety
209216
///
210217
/// The caller must ensure that `index` does not exceed the largest bit index in the backing buffer.
211-
pub unsafe fn set_unchecked(&mut self, index: usize) {
218+
unsafe fn set_unchecked(&mut self, index: usize) {
212219
// SAFETY: checked by caller
213220
unsafe { set_bit_unchecked(self.buffer.as_mut_ptr(), self.offset + index) }
214221
}
215222

216223
/// Unset the bit at `index` without checking bounds.
217224
///
225+
/// Note: Do not call this in a tight loop. Prefer to use [`unset_bit_unchecked`].
226+
///
218227
/// # Safety
219228
///
220229
/// The caller must ensure that `index` does not exceed the largest bit index in the backing buffer.
221-
pub unsafe fn unset_unchecked(&mut self, index: usize) {
230+
unsafe fn unset_unchecked(&mut self, index: usize) {
222231
// SAFETY: checked by caller
223232
unsafe { unset_bit_unchecked(self.buffer.as_mut_ptr(), self.offset + index) }
224233
}
@@ -229,6 +238,7 @@ impl BitBufferMut {
229238
///
230239
/// - `new_len` must be less than or equal to [`capacity()`](Self::capacity)
231240
/// - The elements at `old_len..new_len` must be initialized
241+
#[inline(always)]
232242
pub unsafe fn set_len(&mut self, new_len: usize) {
233243
debug_assert!(
234244
new_len <= self.capacity(),
@@ -470,6 +480,11 @@ impl BitBufferMut {
470480
pub fn as_mut_slice(&mut self) -> &mut [u8] {
471481
self.buffer.as_mut_slice()
472482
}
483+
484+
/// Returns a raw mutable pointer to the internal buffer.
485+
pub fn as_mut_ptr(&mut self) -> *mut u8 {
486+
self.buffer.as_mut_ptr()
487+
}
473488
}
474489

475490
impl Default for BitBufferMut {
@@ -511,14 +526,20 @@ impl FromIterator<bool> for BitBufferMut {
511526
fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
512527
let mut iter = iter.into_iter();
513528

514-
// Note that these hints might be incorrect.
515-
let (lower_bound, upper_bound_opt) = iter.size_hint();
516-
let capacity = upper_bound_opt.unwrap_or(lower_bound);
529+
// Since we do not know the length of the iterator, we can only guess how much memory we
530+
// need to reserve. Note that these hints may be inaccurate.
531+
let (lower_bound, _) = iter.size_hint();
517532

518-
let mut buf = BitBufferMut::new_unset(capacity);
533+
// We choose not to use the optional upper bound size hint to match the standard library.
534+
535+
// Initialize all bits to 0 with the given length. By doing this, we only need to set bits
536+
// that are true (and this is faster from benchmarks).
537+
let mut buf = BitBufferMut::new_unset(lower_bound);
538+
assert_eq!(buf.offset, 0);
519539

520540
// Directly write within our known capacity.
521-
for i in 0..capacity {
541+
let ptr = buf.buffer.as_mut_ptr();
542+
for i in 0..lower_bound {
522543
let Some(v) = iter.next() else {
523544
// SAFETY: We are definitely under the capacity and all values are already
524545
// initialized from `new_unset`.
@@ -528,7 +549,7 @@ impl FromIterator<bool> for BitBufferMut {
528549

529550
if v {
530551
// SAFETY: We have ensured that we are within the capacity.
531-
unsafe { buf.set_unchecked(i) }
552+
unsafe { set_bit_unchecked(ptr, i) }
532553
}
533554
}
534555

vortex-buffer/src/buffer.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,10 @@ impl<T> Buffer<T> {
193193
/// Create a buffer with values from the TrustedLen iterator.
194194
/// Should be preferred over `from_iter` when the iterator is known to be `TrustedLen`.
195195
pub fn from_trusted_len_iter<I: TrustedLen<Item = T>>(iter: I) -> Self {
196-
let (_, high) = iter.size_hint();
197-
let mut buffer =
198-
BufferMut::with_capacity(high.vortex_expect("TrustedLen iterator has no upper bound"));
196+
let (_, upper_bound) = iter.size_hint();
197+
let mut buffer = BufferMut::with_capacity(
198+
upper_bound.vortex_expect("TrustedLen iterator has no upper bound"),
199+
);
199200
buffer.extend_trusted(iter);
200201
buffer.freeze()
201202
}

vortex-buffer/src/buffer_mut.rs

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -484,14 +484,11 @@ impl<T> BufferMut<T> {
484484
fn extend_iter(&mut self, mut iter: impl Iterator<Item = T>) {
485485
// Since we do not know the length of the iterator, we can only guess how much memory we
486486
// need to reserve. Note that these hints may be inaccurate.
487-
let (lower_bound, upper_bound_opt) = iter.size_hint();
488-
489-
// In the case that the upper bound is adversarial, we put a hard limit on the amount of
490-
// memory we reserve (and the OS should handle the rest with zero pages).
491-
let reserve_amount = upper_bound_opt
492-
.unwrap_or(lower_bound)
493-
.min(i32::MAX as usize);
494-
self.reserve(reserve_amount);
487+
let (lower_bound, _) = iter.size_hint();
488+
489+
// We choose not to use the optional upper bound size hint to match the standard library.
490+
491+
self.reserve(lower_bound);
495492

496493
let unwritten = self.capacity() - self.len();
497494

vortex-buffer/src/trusted_len.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,17 +72,17 @@ pub trait TrustedLenExt: Iterator + Sized {
7272
///
7373
/// The caller must guarantee that the iterator does indeed have an exact length.
7474
unsafe fn trusted_len(self) -> TrustedLenAdapter<Self> {
75-
let (lower, maybe_upper) = self.size_hint();
76-
if let Some(upper) = maybe_upper {
75+
let (lower_bound, upper_bound_opt) = self.size_hint();
76+
if let Some(upper_bound) = upper_bound_opt {
7777
assert_eq!(
78-
lower, upper,
78+
lower_bound, upper_bound,
7979
"TrustedLenExt: iterator size hints must match if upper bound is given"
8080
);
8181
}
8282

8383
TrustedLenAdapter {
8484
inner: self,
85-
len: lower,
85+
len: lower_bound,
8686
#[cfg(debug_assertions)]
8787
count: 0,
8888
}

vortex-mask/src/lib.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use std::sync::{Arc, OnceLock};
2020

2121
use itertools::Itertools;
2222
pub use mask_mut::*;
23-
use vortex_buffer::{BitBuffer, BitBufferMut};
23+
use vortex_buffer::{BitBuffer, BitBufferMut, set_bit_unchecked};
2424
use vortex_error::{VortexResult, vortex_panic};
2525

2626
/// Represents a set of values that are all included, all excluded, or some mixture of both.
@@ -601,11 +601,13 @@ impl Mask {
601601
let existing_buffer = mask_values.bit_buffer();
602602

603603
let mut new_buffer_builder = BitBufferMut::new_unset(mask_values.len());
604+
debug_assert!(limit < mask_values.len());
604605

606+
let ptr = new_buffer_builder.as_mut_ptr();
605607
for index in existing_buffer.set_indices().take(limit) {
606-
unsafe {
607-
new_buffer_builder.set_unchecked(index);
608-
}
608+
// SAFETY: We checked that `limit` was less than the mask values length,
609+
// therefore `index` must be within the bounds of the bit buffer.
610+
unsafe { set_bit_unchecked(ptr, index) }
609611
}
610612

611613
Self::from(new_buffer_builder.freeze())

vortex-vector/src/bool/from_iter.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,28 +26,29 @@ impl FromIterator<Option<bool>> for BoolVectorMut {
2626
I: IntoIterator<Item = Option<bool>>,
2727
{
2828
let iter = iter.into_iter();
29+
// Since we do not know the length of the iterator, we can only guess how much memory we
30+
// need to reserve. Note that these hints may be inaccurate.
2931
let (lower_bound, _) = iter.size_hint();
3032

31-
let mut bits = Vec::with_capacity(lower_bound);
33+
// We choose not to use the optional upper bound size hint to match the standard library.
34+
35+
let mut bits = BitBufferMut::with_capacity(lower_bound);
3236
let mut validity = MaskMut::with_capacity(lower_bound);
3337

3438
for opt_val in iter {
3539
match opt_val {
3640
Some(val) => {
37-
bits.push(val);
41+
bits.append(val);
3842
validity.append_n(true, 1);
3943
}
4044
None => {
41-
bits.push(false); // Value doesn't matter for invalid entries.
45+
bits.append(false); // Value doesn't matter for invalid entries.
4246
validity.append_n(false, 1);
4347
}
4448
}
4549
}
4650

47-
BoolVectorMut {
48-
bits: BitBufferMut::from_iter(bits),
49-
validity,
50-
}
51+
BoolVectorMut { bits, validity }
5152
}
5253
}
5354

vortex-vector/src/primitive/from_iter.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,13 @@ impl<T: NativePType> FromIterator<Option<T>> for PVectorMut<T> {
2727
I: IntoIterator<Item = Option<T>>,
2828
{
2929
let iter = iter.into_iter();
30+
// Since we do not know the length of the iterator, we can only guess how much memory we
31+
// need to reserve. Note that these hints may be inaccurate.
3032
let (lower_bound, _) = iter.size_hint();
3133

32-
let mut elements = Vec::with_capacity(lower_bound);
34+
// We choose not to use the optional upper bound size hint to match the standard library.
35+
36+
let mut elements = BufferMut::with_capacity(lower_bound);
3337
let mut validity = MaskMut::with_capacity(lower_bound);
3438

3539
for opt_val in iter {
@@ -45,10 +49,7 @@ impl<T: NativePType> FromIterator<Option<T>> for PVectorMut<T> {
4549
}
4650
}
4751

48-
PVectorMut {
49-
elements: BufferMut::from_iter(elements),
50-
validity,
51-
}
52+
PVectorMut { elements, validity }
5253
}
5354
}
5455

0 commit comments

Comments
 (0)