Skip to content

Commit 2818b02

Browse files
committed
add constructors and validation to list vector
Signed-off-by: Connor Tsui <[email protected]>
1 parent 7bf36e4 commit 2818b02

File tree

7 files changed

+409
-113
lines changed

7 files changed

+409
-113
lines changed

vortex-buffer/src/bit/buf_mut.rs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
use std::ops::Not;
55

6-
use arrow_buffer::bit_chunk_iterator::BitChunks;
6+
use arrow_buffer::bit_chunk_iterator::{BitChunks, UnalignedBitChunk};
77
use bitvec::view::BitView;
88

99
use crate::bit::{get_bit_unchecked, ops, set_bit_unchecked, unset_bit_unchecked};
@@ -495,6 +495,21 @@ impl BitBufferMut {
495495
pub fn as_mut_ptr(&mut self) -> *mut u8 {
496496
self.buffer.as_mut_ptr()
497497
}
498+
499+
/// Access chunks of the buffer aligned to 8 byte boundary as [prefix, \<full chunks\>, suffix]
500+
pub fn unaligned_chunks(&self) -> UnalignedBitChunk<'_> {
501+
UnalignedBitChunk::new(self.buffer.as_slice(), self.offset, self.len)
502+
}
503+
504+
/// Get the number of set bits in the buffer.
505+
pub fn true_count(&self) -> usize {
506+
self.unaligned_chunks().count_ones()
507+
}
508+
509+
/// Get the number of unset bits in the buffer.
510+
pub fn false_count(&self) -> usize {
511+
self.len - self.true_count()
512+
}
498513
}
499514

500515
impl Default for BitBufferMut {

vortex-mask/src/lib.rs

Lines changed: 87 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -123,93 +123,6 @@ pub struct MaskValues {
123123
density: f64,
124124
}
125125

126-
impl MaskValues {
127-
/// Returns the length of the mask.
128-
#[inline]
129-
pub fn len(&self) -> usize {
130-
self.buffer.len()
131-
}
132-
133-
/// Returns true if the mask is empty i.e., it's length is 0.
134-
#[inline]
135-
pub fn is_empty(&self) -> bool {
136-
self.buffer.is_empty()
137-
}
138-
139-
/// Returns the true count of the mask.
140-
#[inline]
141-
pub fn true_count(&self) -> usize {
142-
self.true_count
143-
}
144-
145-
/// Returns the boolean buffer representation of the mask.
146-
#[inline]
147-
pub fn bit_buffer(&self) -> &BitBuffer {
148-
&self.buffer
149-
}
150-
151-
/// Returns the boolean value at a given index.
152-
#[inline]
153-
pub fn value(&self, index: usize) -> bool {
154-
self.buffer.value(index)
155-
}
156-
157-
/// Constructs an indices vector from one of the other representations.
158-
pub fn indices(&self) -> &[usize] {
159-
self.indices.get_or_init(|| {
160-
if self.true_count == 0 {
161-
return vec![];
162-
}
163-
164-
if self.true_count == self.len() {
165-
return (0..self.len()).collect();
166-
}
167-
168-
if let Some(slices) = self.slices.get() {
169-
let mut indices = Vec::with_capacity(self.true_count);
170-
indices.extend(slices.iter().flat_map(|(start, end)| *start..*end));
171-
debug_assert!(indices.is_sorted());
172-
assert_eq!(indices.len(), self.true_count);
173-
return indices;
174-
}
175-
176-
let mut indices = Vec::with_capacity(self.true_count);
177-
indices.extend(self.buffer.set_indices());
178-
debug_assert!(indices.is_sorted());
179-
assert_eq!(indices.len(), self.true_count);
180-
indices
181-
})
182-
}
183-
184-
/// Constructs a slices vector from one of the other representations.
185-
#[allow(clippy::cast_possible_truncation)]
186-
#[inline]
187-
pub fn slices(&self) -> &[(usize, usize)] {
188-
self.slices.get_or_init(|| {
189-
if self.true_count == self.len() {
190-
return vec![(0, self.len())];
191-
}
192-
193-
self.buffer.set_slices().collect()
194-
})
195-
}
196-
197-
/// Return an iterator over either indices or slices of the mask based on a density threshold.
198-
#[inline]
199-
pub fn threshold_iter(&self, threshold: f64) -> MaskIter<'_> {
200-
if self.density >= threshold {
201-
MaskIter::Slices(self.slices())
202-
} else {
203-
MaskIter::Indices(self.indices())
204-
}
205-
}
206-
207-
/// Extracts the internal [`BitBuffer`].
208-
pub(crate) fn into_buffer(self) -> BitBuffer {
209-
self.buffer
210-
}
211-
}
212-
213126
impl Mask {
214127
/// Create a new Mask where all values are set.
215128
#[inline]
@@ -642,6 +555,93 @@ impl Mask {
642555
}
643556
}
644557

558+
impl MaskValues {
559+
/// Returns the length of the mask.
560+
#[inline]
561+
pub fn len(&self) -> usize {
562+
self.buffer.len()
563+
}
564+
565+
/// Returns true if the mask is empty i.e., it's length is 0.
566+
#[inline]
567+
pub fn is_empty(&self) -> bool {
568+
self.buffer.is_empty()
569+
}
570+
571+
/// Returns the true count of the mask.
572+
#[inline]
573+
pub fn true_count(&self) -> usize {
574+
self.true_count
575+
}
576+
577+
/// Returns the boolean buffer representation of the mask.
578+
#[inline]
579+
pub fn bit_buffer(&self) -> &BitBuffer {
580+
&self.buffer
581+
}
582+
583+
/// Returns the boolean value at a given index.
584+
#[inline]
585+
pub fn value(&self, index: usize) -> bool {
586+
self.buffer.value(index)
587+
}
588+
589+
/// Constructs an indices vector from one of the other representations.
590+
pub fn indices(&self) -> &[usize] {
591+
self.indices.get_or_init(|| {
592+
if self.true_count == 0 {
593+
return vec![];
594+
}
595+
596+
if self.true_count == self.len() {
597+
return (0..self.len()).collect();
598+
}
599+
600+
if let Some(slices) = self.slices.get() {
601+
let mut indices = Vec::with_capacity(self.true_count);
602+
indices.extend(slices.iter().flat_map(|(start, end)| *start..*end));
603+
debug_assert!(indices.is_sorted());
604+
assert_eq!(indices.len(), self.true_count);
605+
return indices;
606+
}
607+
608+
let mut indices = Vec::with_capacity(self.true_count);
609+
indices.extend(self.buffer.set_indices());
610+
debug_assert!(indices.is_sorted());
611+
assert_eq!(indices.len(), self.true_count);
612+
indices
613+
})
614+
}
615+
616+
/// Constructs a slices vector from one of the other representations.
617+
#[allow(clippy::cast_possible_truncation)]
618+
#[inline]
619+
pub fn slices(&self) -> &[(usize, usize)] {
620+
self.slices.get_or_init(|| {
621+
if self.true_count == self.len() {
622+
return vec![(0, self.len())];
623+
}
624+
625+
self.buffer.set_slices().collect()
626+
})
627+
}
628+
629+
/// Return an iterator over either indices or slices of the mask based on a density threshold.
630+
#[inline]
631+
pub fn threshold_iter(&self, threshold: f64) -> MaskIter<'_> {
632+
if self.density >= threshold {
633+
MaskIter::Slices(self.slices())
634+
} else {
635+
MaskIter::Indices(self.indices())
636+
}
637+
}
638+
639+
/// Extracts the internal [`BitBuffer`].
640+
pub(crate) fn into_buffer(self) -> BitBuffer {
641+
self.buffer
642+
}
643+
}
644+
645645
/// Iterator over the indices or slices of a mask.
646646
pub enum MaskIter<'a> {
647647
/// Slice of pre-cached indices of a mask.

vortex-mask/src/mask_mut.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,24 @@ impl MaskMut {
279279
pub fn is_empty(&self) -> bool {
280280
self.len() == 0
281281
}
282+
283+
/// Returns true if all values in the mask are true.
284+
pub fn all_true(&self) -> bool {
285+
match &self.0 {
286+
Inner::Empty { .. } => true,
287+
Inner::Constant { value, .. } => *value,
288+
Inner::Builder(bits) => bits.true_count() == bits.len(),
289+
}
290+
}
291+
292+
/// Returns true if all values in the mask are false.
293+
pub fn all_false(&self) -> bool {
294+
match &self.0 {
295+
Inner::Empty { .. } => true,
296+
Inner::Constant { value, .. } => !*value,
297+
Inner::Builder(bits) => !bits.is_empty() && bits.true_count() == 0,
298+
}
299+
}
282300
}
283301

284302
impl Mask {

0 commit comments

Comments
 (0)