Skip to content

Commit 75c7e8d

Browse files
authored
chore: use RowSelection::union from arrow-rs (#953)
Fixes #605
1 parent c21b73e commit 75c7e8d

File tree

1 file changed

+3
-120
lines changed

1 file changed

+3
-120
lines changed

crates/iceberg/src/expr/visitors/page_index_evaluator.rs

Lines changed: 3 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@ impl BoundPredicateVisitor for PageIndexEvaluator<'_> {
425425
}
426426

427427
fn or(&mut self, lhs: RowSelection, rhs: RowSelection) -> Result<RowSelection> {
428-
Ok(union_row_selections(&lhs, &rhs))
428+
Ok(lhs.union(&rhs))
429429
}
430430

431431
fn not(&mut self, _: RowSelection) -> Result<RowSelection> {
@@ -772,99 +772,12 @@ impl BoundPredicateVisitor for PageIndexEvaluator<'_> {
772772
}
773773
}
774774

775-
/// Combine two lists of `RowSelection` return the union of them
776-
/// For example:
777-
/// self: NNYYYYNNYYNYN
778-
/// other: NYNNNNNNY
779-
///
780-
/// returned: NYYYYYNNYYNYN
781-
///
782-
/// This can be removed from here once RowSelection::union is in parquet::arrow
783-
/// (Hopefully once https://github.com/apache/arrow-rs/pull/6308 gets merged)
784-
fn union_row_selections(left: &RowSelection, right: &RowSelection) -> RowSelection {
785-
let mut l_iter = left.iter().copied().peekable();
786-
let mut r_iter = right.iter().copied().peekable();
787-
788-
let iter = std::iter::from_fn(move || {
789-
loop {
790-
let l = l_iter.peek_mut();
791-
let r = r_iter.peek_mut();
792-
793-
match (l, r) {
794-
(Some(a), _) if a.row_count == 0 => {
795-
l_iter.next().unwrap();
796-
}
797-
(_, Some(b)) if b.row_count == 0 => {
798-
r_iter.next().unwrap();
799-
}
800-
(Some(l), Some(r)) => {
801-
return match (l.skip, r.skip) {
802-
// Skip both ranges
803-
(true, true) => {
804-
if l.row_count < r.row_count {
805-
let skip = l.row_count;
806-
r.row_count -= l.row_count;
807-
l_iter.next();
808-
Some(RowSelector::skip(skip))
809-
} else {
810-
let skip = r.row_count;
811-
l.row_count -= skip;
812-
r_iter.next();
813-
Some(RowSelector::skip(skip))
814-
}
815-
}
816-
// Keep rows from left
817-
(false, true) => {
818-
if l.row_count < r.row_count {
819-
r.row_count -= l.row_count;
820-
l_iter.next()
821-
} else {
822-
let r_row_count = r.row_count;
823-
l.row_count -= r_row_count;
824-
r_iter.next();
825-
Some(RowSelector::select(r_row_count))
826-
}
827-
}
828-
// Keep rows from right
829-
(true, false) => {
830-
if l.row_count < r.row_count {
831-
let l_row_count = l.row_count;
832-
r.row_count -= l_row_count;
833-
l_iter.next();
834-
Some(RowSelector::select(l_row_count))
835-
} else {
836-
l.row_count -= r.row_count;
837-
r_iter.next()
838-
}
839-
}
840-
// Keep at least one
841-
_ => {
842-
if l.row_count < r.row_count {
843-
r.row_count -= l.row_count;
844-
l_iter.next()
845-
} else {
846-
l.row_count -= r.row_count;
847-
r_iter.next()
848-
}
849-
}
850-
};
851-
}
852-
(Some(_), None) => return l_iter.next(),
853-
(None, Some(_)) => return r_iter.next(),
854-
(None, None) => return None,
855-
}
856-
}
857-
});
858-
859-
iter.collect()
860-
}
861-
862775
#[cfg(test)]
863776
mod tests {
864777
use std::collections::HashMap;
865778
use std::sync::Arc;
866779

867-
use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
780+
use parquet::arrow::arrow_reader::RowSelector;
868781
use parquet::basic::{LogicalType as ParquetLogicalType, Type as ParquetPhysicalType};
869782
use parquet::data_type::ByteArray;
870783
use parquet::file::metadata::{ColumnChunkMetaData, RowGroupMetaData};
@@ -877,41 +790,11 @@ mod tests {
877790
};
878791
use rand::{thread_rng, Rng};
879792

880-
use super::{union_row_selections, PageIndexEvaluator};
793+
use super::PageIndexEvaluator;
881794
use crate::expr::{Bind, Reference};
882795
use crate::spec::{Datum, NestedField, PrimitiveType, Schema, Type};
883796
use crate::{ErrorKind, Result};
884797

885-
#[test]
886-
fn test_union_row_selections() {
887-
let selection = RowSelection::from(vec![RowSelector::select(1048576)]);
888-
let result = union_row_selections(&selection, &selection);
889-
assert_eq!(result, selection);
890-
891-
// NYNYY
892-
let a = RowSelection::from(vec![
893-
RowSelector::skip(10),
894-
RowSelector::select(10),
895-
RowSelector::skip(10),
896-
RowSelector::select(20),
897-
]);
898-
899-
// NNYYN
900-
let b = RowSelection::from(vec![
901-
RowSelector::skip(20),
902-
RowSelector::select(20),
903-
RowSelector::skip(10),
904-
]);
905-
906-
let result = union_row_selections(&a, &b);
907-
908-
// NYYYY
909-
assert_eq!(result.iter().collect::<Vec<_>>(), vec![
910-
&RowSelector::skip(10),
911-
&RowSelector::select(40)
912-
]);
913-
}
914-
915798
#[test]
916799
fn eval_matches_no_rows_for_empty_row_group() -> Result<()> {
917800
let row_group_metadata = create_row_group_metadata(0, 0, None, 0, None)?;

0 commit comments

Comments
 (0)