Skip to content

Commit ebcfa20

Browse files
authored
Docs for vortex-scan (#2065)
Part of #1905
1 parent 4d82c2e commit ebcfa20

File tree

3 files changed

+19
-8
lines changed

3 files changed

+19
-8
lines changed

vortex-scan/src/lib.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
//! The `vortex-scan` crate provides utilities for performing efficient scan operations.
2+
//!
3+
//! The [`Scanner`] object is responsible for storing state related to a scan operation, including
4+
//! expression selectivity metrics, in order to continually optimize the execution plan for each
5+
//! row-range of the scan.
6+
#![deny(missing_docs)]
17
mod range_scan;
28
mod row_mask;
39

vortex-scan/src/range_scan.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use vortex_mask::Mask;
1010

1111
use crate::{RowMask, Scanner};
1212

13+
/// A scan operation defined for a single row-range of the columnar data.
1314
pub struct RangeScanner {
1415
scan: Arc<Scanner>,
1516
row_range: Range<u64>,
@@ -26,6 +27,8 @@ enum State {
2627
Ready(Option<ArrayData>),
2728
}
2829

30+
/// The next operation that should be performed. Either an expression to run, or the result
31+
/// of the [`RangeScanner`].
2932
pub enum NextOp {
3033
/// The finished result of the scan.
3134
Ready(Option<ArrayData>),

vortex-scan/src/row_mask.rs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ use vortex_mask::Mask;
1414

1515
/// A RowMask captures a set of selected rows within a range.
1616
///
17-
/// The range itself can be [`u64`], but the length of the range must fit into a [`usize`].
17+
/// The range itself can be [`u64`], but the length of the range must fit into a [`usize`], this
18+
/// allows us to use a `usize` filter mask within a much larger file.
1819
#[derive(Debug, Clone)]
1920
pub struct RowMask {
2021
mask: Mask,
@@ -37,6 +38,7 @@ impl Display for RowMask {
3738
}
3839

3940
impl RowMask {
41+
/// Define a new [`RowMask`] with the given mask and offset into the file.
4042
pub fn new(mask: Mask, begin: u64) -> Self {
4143
let end = begin + (mask.len() as u64);
4244
Self { mask, begin, end }
@@ -161,6 +163,7 @@ impl RowMask {
161163
}
162164
}
163165

166+
/// Perform an intersection with another [`RowMask`], returning only rows that appear in both.
164167
pub fn and_rowmask(self, other: RowMask) -> VortexResult<Self> {
165168
if other.true_count() == other.len() {
166169
return Ok(self);
@@ -204,21 +207,20 @@ impl RowMask {
204207
Ok(Self::new(output_mask, output_begin))
205208
}
206209

207-
#[inline]
208-
pub fn is_all_false(&self) -> bool {
209-
self.mask.true_count() == 0
210-
}
211-
210+
/// The beginning of the masked range.
212211
#[inline]
213212
pub fn begin(&self) -> u64 {
214213
self.begin
215214
}
216215

216+
/// The end of the masked range.
217217
#[inline]
218218
pub fn end(&self) -> u64 {
219219
self.end
220220
}
221221

222+
/// The length of the mask is the number of possible rows between the `begin` and `end`,
223+
/// regardless of how many appear in the mask. For the number of masked rows, see `true_count`.
222224
#[inline]
223225
// There is good definition of is_empty, does it mean len == 0 or true_count == 0?
224226
#[allow(clippy::len_without_is_empty)]
@@ -306,7 +308,7 @@ impl RowMask {
306308
Ok(RowMask::new(self.mask, self.begin - offset))
307309
}
308310

309-
// Get the true count of the underlying mask.
311+
/// The number of masked rows within the range.
310312
pub fn true_count(&self) -> usize {
311313
self.mask.true_count()
312314
}
@@ -413,7 +415,7 @@ mod tests {
413415

414416
assert_eq!(output.begin, 0);
415417
assert_eq!(output.end, 20);
416-
assert!(output.is_all_false());
418+
assert_eq!(output.true_count(), 0);
417419
}
418420

419421
#[test]

0 commit comments

Comments
 (0)