Skip to content

Commit 684ed94

Browse files
authored
Chore: Add more docs + from_option_iter (#5031)
Tracking Issue: #5028 Adds documentation examples to everything. Also removes some of the macros that were only specific to a single method that are probably not super useful for users. There is still an issue where `try_into_mut` on `Mask` doesn't really behave as it should (because we need to still change out arrow's `BooleanBuffer` for our `BitBuffer` inside `MaskValues`, but I have TODOs on those doctests. --------- Signed-off-by: Connor Tsui <[email protected]>
1 parent 6b04887 commit 684ed94

File tree

16 files changed

+845
-266
lines changed

16 files changed

+845
-266
lines changed

vortex-buffer/src/bit/buf_mut.rs

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4-
// TODO(connor): The API of `BitBufferMut` should probably share more methods with `BitBuffer`.
5-
64
use arrow_buffer::bit_chunk_iterator::BitChunks;
75
use bitvec::view::BitView;
86

@@ -150,12 +148,11 @@ impl BitBufferMut {
150148

151149
/// Reserve additional bit capacity for the buffer.
152150
pub fn reserve(&mut self, additional: usize) {
153-
let required_capacity = (self.offset + self.len + additional).div_ceil(8);
154-
let buffer_capacity = self.buffer.capacity();
155-
if required_capacity > self.buffer.capacity() {
156-
let additional = required_capacity - buffer_capacity;
157-
self.buffer.reserve(additional);
158-
}
151+
let required_bits = self.offset + self.len + additional;
152+
let required_bytes = required_bits.div_ceil(8); // Rounds up.
153+
154+
let additional_bytes = required_bytes.saturating_sub(self.buffer.len());
155+
self.buffer.reserve(additional_bytes);
159156
}
160157

161158
/// Set the bit at `index` to the given boolean value.
@@ -543,6 +540,28 @@ mod tests {
543540
assert!(bools.value(9));
544541
}
545542

543+
#[test]
544+
fn test_reserve_ensures_len_plus_additional() {
545+
// This test documents the fix for the bug where reserve was incorrectly
546+
// calculating additional bytes from capacity instead of len.
547+
548+
let mut bits = BitBufferMut::with_capacity(10);
549+
assert_eq!(bits.len(), 0);
550+
551+
bits.reserve(100);
552+
553+
// Should have capacity for at least len + 100 = 0 + 100 = 100 bits.
554+
assert!(bits.capacity() >= 100);
555+
556+
bits.append_n(true, 50);
557+
assert_eq!(bits.len(), 50);
558+
559+
bits.reserve(100);
560+
561+
// Should have capacity for at least len + 100 = 50 + 100 = 150 bits.
562+
assert!(bits.capacity() >= 150);
563+
}
564+
546565
#[test]
547566
fn test_with_offset_zero() {
548567
// Test basic operations when offset is 0

vortex-mask/src/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,11 @@ impl MaskValues {
203203
MaskIter::Indices(self.indices())
204204
}
205205
}
206+
207+
/// Extracts the internal [`BitBuffer`].
208+
pub(crate) fn into_buffer(self) -> BitBuffer {
209+
self.buffer
210+
}
206211
}
207212

208213
impl Mask {

vortex-mask/src/mask_mut.rs

Lines changed: 69 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use std::ops::Sub;
5+
use std::sync::Arc;
56

67
use vortex_buffer::BitBufferMut;
78

@@ -225,9 +226,13 @@ impl Mask {
225226
Mask::AllTrue(len) => Ok(MaskMut::new_true(len)),
226227
Mask::AllFalse(len) => Ok(MaskMut::new_false(len)),
227228
Mask::Values(values) => {
228-
// FIXME(ngates): we can never convert Arrow BooleanBuffer to ByteBufferMut,
229-
// so we have to wait until we use BitBuffer internally in MaskValues.
230-
Err(Mask::Values(values))
229+
// We need to check for uniqueness twice, first for the `Arc` with `try_unwrap`,
230+
// then for the internal `BitBuffer` with `try_into_mut`.
231+
let owned_values = Arc::try_unwrap(values).map_err(Mask::Values)?;
232+
let bit_buffer = owned_values.into_buffer();
233+
let mut_buffer = bit_buffer.try_into_mut().map_err(Mask::from_buffer)?;
234+
235+
Ok(MaskMut(Inner::Builder(mut_buffer)))
231236
}
232237
}
233238
}
@@ -461,8 +466,6 @@ mod tests {
461466
}
462467

463468
#[test]
464-
// TODO(ngates): when mask uses BitBuffer internally, into_mut should succeed
465-
#[should_panic]
466469
fn test_round_trip_split_unsplit() {
467470
let mut original = MaskMut::with_capacity(20);
468471
// Pattern: 10 true, 10 false
@@ -531,4 +534,65 @@ mod tests {
531534
let frozen = mask.freeze();
532535
assert_eq!(frozen.true_count(), 30);
533536
}
537+
538+
#[test]
539+
fn test_try_into_mut_all_variants() {
540+
// Test AllTrue and AllFalse variants.
541+
let mask_true = Mask::new_true(100);
542+
let mut_mask_true = mask_true.try_into_mut().unwrap();
543+
assert_eq!(mut_mask_true.len(), 100);
544+
assert_eq!(mut_mask_true.freeze().true_count(), 100);
545+
546+
let mask_false = Mask::new_false(50);
547+
let mut_mask_false = mask_false.try_into_mut().unwrap();
548+
assert_eq!(mut_mask_false.len(), 50);
549+
assert_eq!(mut_mask_false.freeze().true_count(), 0);
550+
}
551+
552+
#[test]
553+
fn test_try_into_mut_with_references() {
554+
// Create a MaskValues variant.
555+
let mut mask_mut = MaskMut::with_capacity(10);
556+
mask_mut.append_n(true, 5);
557+
mask_mut.append_n(false, 5);
558+
let mask = mask_mut.freeze();
559+
560+
// Should succeed with unique reference (no clones).
561+
let mask2 = {
562+
let mut mask_mut2 = MaskMut::with_capacity(10);
563+
mask_mut2.append_n(true, 5);
564+
mask_mut2.append_n(false, 5);
565+
mask_mut2.freeze()
566+
};
567+
let result = mask2.try_into_mut();
568+
assert!(result.is_ok());
569+
assert_eq!(result.unwrap().len(), 10);
570+
571+
// Should fail with shared references.
572+
let _cloned = mask.clone();
573+
let result = mask.try_into_mut();
574+
assert!(result.is_err());
575+
if let Err(returned_mask) = result {
576+
assert_eq!(returned_mask.len(), 10);
577+
assert_eq!(returned_mask.true_count(), 5);
578+
}
579+
}
580+
581+
#[test]
582+
fn test_try_into_mut_round_trip() {
583+
// Test freeze -> try_into_mut -> modify -> freeze cycle.
584+
let mut original = MaskMut::with_capacity(20);
585+
original.append_n(true, 10);
586+
original.append_n(false, 10);
587+
588+
let frozen = original.freeze();
589+
assert_eq!(frozen.true_count(), 10);
590+
591+
let mut mut_mask = frozen.try_into_mut().unwrap();
592+
mut_mask.append_n(true, 5);
593+
assert_eq!(mut_mask.len(), 25);
594+
595+
let frozen_again = mut_mask.freeze();
596+
assert_eq!(frozen_again.true_count(), 15);
597+
}
534598
}

vortex-vector/src/bool/vector.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,15 @@ use crate::VectorOps;
1111

1212
/// An immutable vector of boolean values.
1313
///
14-
/// The mutable equivalent of this type is [`BoolVectorMut`].
14+
/// `BoolVector` can be considered a borrowed / frozen version of [`BoolVectorMut`], which is
15+
/// created via the [`freeze`](crate::VectorMutOps::freeze) method.
16+
///
17+
/// See the documentation for [`BoolVectorMut`] for more information.
1518
#[derive(Debug, Clone)]
1619
pub struct BoolVector {
20+
/// The bits that we use to represent booleans.
1721
pub(super) bits: BitBuffer,
22+
/// The validity mask (where `true` represents an element is **not** null).
1823
pub(super) validity: Mask,
1924
}
2025

vortex-vector/src/bool/vector_mut.rs

Lines changed: 174 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,62 @@ use crate::{VectorMutOps, VectorOps};
1111

1212
/// A mutable vector of boolean values.
1313
///
14-
/// The immutable equivalent of this type is [`BoolVector`].
14+
/// `BoolVectorMut` is the primary way to construct boolean vectors. It provides efficient methods
15+
/// for building vectors incrementally before converting them to an immutable [`BoolVector`] using
16+
/// the [`freeze`](crate::VectorMutOps::freeze) method.
17+
///
18+
/// # Examples
19+
///
20+
/// ## Extending and appending
21+
///
22+
/// ```
23+
/// use vortex_vector::{BoolVectorMut, VectorMutOps};
24+
///
25+
/// let mut vec1 = BoolVectorMut::from_iter([true, false].map(Some));
26+
/// let vec2 = BoolVectorMut::from_iter([true, true].map(Some)).freeze();
27+
///
28+
/// // Extend from another vector.
29+
/// vec1.extend_from_vector(&vec2);
30+
/// assert_eq!(vec1.len(), 4);
31+
///
32+
/// // Append null values.
33+
/// vec1.append_nulls(2);
34+
/// assert_eq!(vec1.len(), 6);
35+
/// ```
36+
///
37+
/// ## Splitting and unsplitting
38+
///
39+
/// ```
40+
/// use vortex_vector::{BoolVectorMut, VectorMutOps};
41+
///
42+
/// let mut vec = BoolVectorMut::from_iter([true, false, true, false, true].map(Some));
43+
///
44+
/// // Split the vector at index 3.
45+
/// let mut second_half = vec.split_off(3);
46+
/// assert_eq!(vec.len(), 3);
47+
/// assert_eq!(second_half.len(), 2);
48+
///
49+
/// // Rejoin the vectors.
50+
/// vec.unsplit(second_half);
51+
/// assert_eq!(vec.len(), 5);
52+
/// ```
53+
///
54+
/// ## Converting to immutable
55+
///
56+
/// ```
57+
/// use vortex_vector::{BoolVectorMut, VectorMutOps, VectorOps};
58+
///
59+
/// let mut vec = BoolVectorMut::from_iter([true, false, true].map(Some));
60+
///
61+
/// // Freeze into an immutable vector.
62+
/// let immutable = vec.freeze();
63+
/// assert_eq!(immutable.len(), 3);
64+
/// ```
1565
#[derive(Debug, Clone)]
1666
pub struct BoolVectorMut {
67+
/// The mutable bits that we use to represent booleans.
1768
pub(super) bits: BitBufferMut,
69+
/// The validity mask (where `true` represents an element is **not** null).
1870
pub(super) validity: MaskMut,
1971
}
2072

@@ -28,6 +80,76 @@ impl BoolVectorMut {
2880
}
2981
}
3082

83+
impl FromIterator<Option<bool>> for BoolVectorMut {
84+
/// Creates a new [`BoolVectorMut`] from an iterator of `Option<bool>` values.
85+
///
86+
/// `None` values will be marked as invalid in the validity mask.
87+
///
88+
/// # Examples
89+
///
90+
/// ```
91+
/// use vortex_vector::{BoolVectorMut, VectorMutOps};
92+
///
93+
/// let mut vec = BoolVectorMut::from_iter([Some(true), None, Some(false)]);
94+
/// assert_eq!(vec.len(), 3);
95+
/// ```
96+
fn from_iter<I>(iter: I) -> Self
97+
where
98+
I: IntoIterator<Item = Option<bool>>,
99+
{
100+
let iter = iter.into_iter();
101+
let (lower_bound, _) = iter.size_hint();
102+
103+
let mut bits = Vec::with_capacity(lower_bound);
104+
let mut validity = MaskMut::with_capacity(lower_bound);
105+
106+
for opt_val in iter {
107+
match opt_val {
108+
Some(val) => {
109+
bits.push(val);
110+
validity.append_n(true, 1);
111+
}
112+
None => {
113+
bits.push(false); // Value doesn't matter for invalid entries.
114+
validity.append_n(false, 1);
115+
}
116+
}
117+
}
118+
119+
BoolVectorMut {
120+
bits: BitBufferMut::from_iter(bits),
121+
validity,
122+
}
123+
}
124+
}
125+
126+
impl FromIterator<bool> for BoolVectorMut {
127+
/// Creates a new [`BoolVectorMut`] from an iterator of `bool` values.
128+
///
129+
/// All values will be treated as non-null.
130+
///
131+
/// # Examples
132+
///
133+
/// ```
134+
/// use vortex_vector::{BoolVectorMut, VectorMutOps};
135+
///
136+
/// let mut vec = BoolVectorMut::from_iter([true, false, false, true]);
137+
/// assert_eq!(vec.len(), 4);
138+
/// ```
139+
fn from_iter<I>(iter: I) -> Self
140+
where
141+
I: IntoIterator<Item = bool>,
142+
{
143+
let buffer = BitBufferMut::from_iter(iter);
144+
let validity = MaskMut::new_true(buffer.len());
145+
146+
BoolVectorMut {
147+
bits: buffer,
148+
validity,
149+
}
150+
}
151+
}
152+
31153
impl VectorMutOps for BoolVectorMut {
32154
type Immutable = BoolVector;
33155

@@ -75,3 +197,54 @@ impl VectorMutOps for BoolVectorMut {
75197
self.validity.unsplit(other.validity);
76198
}
77199
}
200+
201+
#[cfg(test)]
202+
mod tests {
203+
use super::*;
204+
205+
#[test]
206+
fn test_from_iter_with_options() {
207+
// Test FromIterator<Option<bool>> with nulls and empty.
208+
let vec_empty = BoolVectorMut::from_iter(std::iter::empty::<Option<bool>>());
209+
assert_eq!(vec_empty.len(), 0);
210+
211+
let vec = BoolVectorMut::from_iter([Some(true), None, Some(false), None, Some(true)]);
212+
assert_eq!(vec.len(), 5);
213+
let frozen = vec.freeze();
214+
assert_eq!(frozen.validity().true_count(), 3);
215+
}
216+
217+
#[test]
218+
fn test_from_iter_non_null() {
219+
// Test FromIterator<bool> creates all-valid vector.
220+
let vec = BoolVectorMut::from_iter([true, false, true, true, false]);
221+
assert_eq!(vec.len(), 5);
222+
let frozen = vec.freeze();
223+
assert_eq!(frozen.validity().true_count(), 5);
224+
}
225+
226+
#[test]
227+
fn test_operations_preserve_validity() {
228+
// Comprehensive test for split/unsplit/extend preserving validity.
229+
let mut vec = BoolVectorMut::from_iter([Some(true), None, Some(false), None, Some(true)]);
230+
231+
// Test split.
232+
let second_half = vec.split_off(2);
233+
assert_eq!(vec.len(), 2);
234+
assert_eq!(second_half.len(), 3);
235+
236+
// Test validity after split.
237+
let frozen_first = vec.freeze();
238+
assert_eq!(frozen_first.validity().true_count(), 1);
239+
let frozen_second = second_half.freeze();
240+
assert_eq!(frozen_second.validity().true_count(), 2);
241+
242+
// Test unsplit.
243+
let mut vec1 = BoolVectorMut::from_iter([Some(true), None]);
244+
let vec2 = BoolVectorMut::from_iter([Some(false), Some(true)]);
245+
vec1.unsplit(vec2);
246+
assert_eq!(vec1.len(), 4);
247+
let frozen = vec1.freeze();
248+
assert_eq!(frozen.validity().true_count(), 3);
249+
}
250+
}

0 commit comments

Comments
 (0)