Skip to content

Commit e7ebc13

Browse files
bors[bot]Ten0
andauthored
Merge #372
372: Implement missing specializations on the PutBack adaptor and on the MergeJoinBy Iterator r=jswrenn a=Ten0 Resolves #371 `count`, `last` and `nth` of the `MergeJoinBy` iterator are made faster when one of the iterators is completely consumed by directly calling the methods of the underlying only iterator left (there is benefit if the underlying iterator also specialized these methods). This is in particular useful when you want to count the number of different elements in the union of two sorted known-size iterators (`count`). Those methods are also specialized on the `PutBack` adaptor for the same performance reasons. The `nth` specialization on the `MergeJoinBy` iterator depends on the `nth` specialization on the `PutBack` adaptor working. Co-authored-by: Thomas BESSOU <[email protected]>
2 parents c620ae8 + 8bae261 commit e7ebc13

File tree

3 files changed

+250
-4
lines changed

3 files changed

+250
-4
lines changed

src/adaptors/mod.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,28 @@ impl<I> Iterator for PutBack<I>
233233
size_hint::add_scalar(self.iter.size_hint(), self.top.is_some() as usize)
234234
}
235235

236+
fn count(self) -> usize {
237+
self.iter.count() + (self.top.is_some() as usize)
238+
}
239+
240+
fn last(self) -> Option<Self::Item> {
241+
self.iter.last().or(self.top)
242+
}
243+
244+
fn nth(&mut self, n: usize) -> Option<Self::Item> {
245+
match self.top {
246+
None => self.iter.nth(n),
247+
ref mut some => {
248+
if n == 0 {
249+
some.take()
250+
} else {
251+
*some = None;
252+
self.iter.nth(n - 1)
253+
}
254+
}
255+
}
256+
}
257+
236258
fn all<G>(&mut self, mut f: G) -> bool
237259
where G: FnMut(Self::Item) -> bool
238260
{

src/merge_join.rs

Lines changed: 74 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ pub struct MergeJoinBy<I: Iterator, J: Iterator, F> {
3232
}
3333

3434
impl<I, J, F> Clone for MergeJoinBy<I, J, F>
35-
where I: Clone + Iterator,
36-
I::Item: Clone,
37-
J: Clone + Iterator,
38-
J::Item: Clone,
35+
where I: Iterator,
36+
J: Iterator,
37+
PutBack<Fuse<I>>: Clone,
38+
PutBack<Fuse<J>>: Clone,
3939
F: Clone,
4040
{
4141
clone_fields!(left, right, cmp_fn);
@@ -94,4 +94,74 @@ impl<I, J, F> Iterator for MergeJoinBy<I, J, F>
9494

9595
(lower, upper)
9696
}
97+
98+
fn count(mut self) -> usize {
99+
let mut count = 0;
100+
loop {
101+
match (self.left.next(), self.right.next()) {
102+
(None, None) => break count,
103+
(Some(_left), None) => break count + 1 + self.left.into_parts().1.count(),
104+
(None, Some(_right)) => break count + 1 + self.right.into_parts().1.count(),
105+
(Some(left), Some(right)) => {
106+
count += 1;
107+
match (self.cmp_fn)(&left, &right) {
108+
Ordering::Equal => {}
109+
Ordering::Less => self.right.put_back(right),
110+
Ordering::Greater => self.left.put_back(left),
111+
}
112+
}
113+
}
114+
}
115+
}
116+
117+
fn last(mut self) -> Option<Self::Item> {
118+
let mut previous_element = None;
119+
loop {
120+
match (self.left.next(), self.right.next()) {
121+
(None, None) => break previous_element,
122+
(Some(left), None) => {
123+
break Some(EitherOrBoth::Left(
124+
self.left.into_parts().1.last().unwrap_or(left),
125+
))
126+
}
127+
(None, Some(right)) => {
128+
break Some(EitherOrBoth::Right(
129+
self.right.into_parts().1.last().unwrap_or(right),
130+
))
131+
}
132+
(Some(left), Some(right)) => {
133+
previous_element = match (self.cmp_fn)(&left, &right) {
134+
Ordering::Equal => Some(EitherOrBoth::Both(left, right)),
135+
Ordering::Less => {
136+
self.right.put_back(right);
137+
Some(EitherOrBoth::Left(left))
138+
}
139+
Ordering::Greater => {
140+
self.left.put_back(left);
141+
Some(EitherOrBoth::Right(right))
142+
}
143+
}
144+
}
145+
}
146+
}
147+
}
148+
149+
fn nth(&mut self, mut n: usize) -> Option<Self::Item> {
150+
loop {
151+
if n == 0 {
152+
break self.next();
153+
}
154+
n -= 1;
155+
match (self.left.next(), self.right.next()) {
156+
(None, None) => break None,
157+
(Some(_left), None) => break self.left.nth(n).map(EitherOrBoth::Left),
158+
(None, Some(_right)) => break self.right.nth(n).map(EitherOrBoth::Right),
159+
(Some(left), Some(right)) => match (self.cmp_fn)(&left, &right) {
160+
Ordering::Equal => {}
161+
Ordering::Less => self.right.put_back(right),
162+
Ordering::Greater => self.left.put_back(left),
163+
},
164+
}
165+
}
166+
}
97167
}

tests/specializations.rs

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
extern crate itertools;
2+
3+
#[macro_use]
4+
extern crate quickcheck;
5+
6+
use itertools::{EitherOrBoth, Itertools};
7+
8+
use std::fmt::Debug;
9+
use std::ops::BitXor;
10+
11+
struct Unspecialized<I>(I);
12+
impl<I> Iterator for Unspecialized<I>
13+
where
14+
I: Iterator,
15+
{
16+
type Item = I::Item;
17+
18+
#[inline(always)]
19+
fn next(&mut self) -> Option<I::Item> {
20+
self.0.next()
21+
}
22+
23+
#[inline(always)]
24+
fn size_hint(&self) -> (usize, Option<usize>) {
25+
self.0.size_hint()
26+
}
27+
}
28+
29+
fn check_specialized<'a, V, IterItem, Iter, F>(iterator: &Iter, mapper: F)
30+
where
31+
V: Eq + Debug,
32+
IterItem: 'a,
33+
Iter: Iterator<Item = IterItem> + Clone + 'a,
34+
F: Fn(Box<Iterator<Item = IterItem> + 'a>) -> V,
35+
{
36+
assert_eq!(
37+
mapper(Box::new(Unspecialized(iterator.clone()))),
38+
mapper(Box::new(iterator.clone()))
39+
)
40+
}
41+
42+
fn check_specialized_count_last_nth_sizeh<'a, IterItem, Iter>(
43+
it: &Iter,
44+
known_expected_size: Option<usize>,
45+
) where
46+
IterItem: 'a + Eq + Debug,
47+
Iter: Iterator<Item = IterItem> + Clone + 'a,
48+
{
49+
let size = it.clone().count();
50+
if let Some(expected_size) = known_expected_size {
51+
assert_eq!(size, expected_size);
52+
}
53+
check_specialized(it, |i| i.count());
54+
check_specialized(it, |i| i.last());
55+
for n in 0..size + 2 {
56+
check_specialized(it, |mut i| i.nth(n));
57+
}
58+
let mut it_sh = it.clone();
59+
for n in 0..size + 2 {
60+
let len = it_sh.clone().count();
61+
let (min, max) = it_sh.size_hint();
62+
assert_eq!((size - n.min(size)), len);
63+
assert!(min <= len);
64+
if let Some(max) = max {
65+
assert!(len <= max);
66+
}
67+
it_sh.next();
68+
}
69+
}
70+
71+
fn check_specialized_fold_xor<'a, IterItem, Iter>(it: &Iter)
72+
where
73+
IterItem: 'a
74+
+ BitXor
75+
+ Eq
76+
+ Debug
77+
+ BitXor<<IterItem as BitXor>::Output, Output = <IterItem as BitXor>::Output>
78+
+ Clone,
79+
<IterItem as BitXor>::Output:
80+
BitXor<Output = <IterItem as BitXor>::Output> + Eq + Debug + Clone,
81+
Iter: Iterator<Item = IterItem> + Clone + 'a,
82+
{
83+
check_specialized(it, |mut i| {
84+
let first = i.next().map(|f| f.clone() ^ (f.clone() ^ f));
85+
i.fold(first, |acc, v: IterItem| acc.map(move |a| v ^ a))
86+
});
87+
}
88+
89+
fn put_back_test(test_vec: Vec<i32>, known_expected_size: Option<usize>) {
90+
{
91+
// Lexical lifetimes support
92+
let pb = itertools::put_back(test_vec.iter());
93+
check_specialized_count_last_nth_sizeh(&pb, known_expected_size);
94+
check_specialized_fold_xor(&pb);
95+
}
96+
97+
let mut pb = itertools::put_back(test_vec.into_iter());
98+
pb.put_back(1);
99+
check_specialized_count_last_nth_sizeh(&pb, known_expected_size.map(|x| x + 1));
100+
check_specialized_fold_xor(&pb)
101+
}
102+
103+
#[test]
104+
fn put_back() {
105+
put_back_test(vec![7, 4, 1], Some(3));
106+
}
107+
108+
quickcheck! {
109+
fn put_back_qc(test_vec: Vec<i32>) -> () {
110+
put_back_test(test_vec, None)
111+
}
112+
}
113+
114+
fn merge_join_by_test(i1: Vec<usize>, i2: Vec<usize>, known_expected_size: Option<usize>) {
115+
let i1 = i1.into_iter();
116+
let i2 = i2.into_iter();
117+
let mjb = i1.clone().merge_join_by(i2.clone(), std::cmp::Ord::cmp);
118+
check_specialized_count_last_nth_sizeh(&mjb, known_expected_size);
119+
// Rust 1.24 compatibility:
120+
fn eob_left_z(eob: EitherOrBoth<usize, usize>) -> usize {
121+
eob.left().unwrap_or(0)
122+
}
123+
fn eob_right_z(eob: EitherOrBoth<usize, usize>) -> usize {
124+
eob.left().unwrap_or(0)
125+
}
126+
fn eob_both_z(eob: EitherOrBoth<usize, usize>) -> usize {
127+
let (a, b) = eob.both().unwrap_or((0, 0));
128+
assert_eq!(a, b);
129+
a
130+
}
131+
check_specialized_fold_xor(&mjb.clone().map(eob_left_z));
132+
check_specialized_fold_xor(&mjb.clone().map(eob_right_z));
133+
check_specialized_fold_xor(&mjb.clone().map(eob_both_z));
134+
135+
// And the other way around
136+
let mjb = i2.merge_join_by(i1, std::cmp::Ord::cmp);
137+
check_specialized_count_last_nth_sizeh(&mjb, known_expected_size);
138+
check_specialized_fold_xor(&mjb.clone().map(eob_left_z));
139+
check_specialized_fold_xor(&mjb.clone().map(eob_right_z));
140+
check_specialized_fold_xor(&mjb.clone().map(eob_both_z));
141+
}
142+
143+
#[test]
144+
fn merge_join_by() {
145+
let i1 = vec![1, 3, 5, 7, 8, 9];
146+
let i2 = vec![0, 3, 4, 5];
147+
merge_join_by_test(i1, i2, Some(8));
148+
}
149+
150+
quickcheck! {
151+
fn merge_join_by_qc(i1: Vec<usize>, i2: Vec<usize>) -> () {
152+
merge_join_by_test(i1, i2, None)
153+
}
154+
}

0 commit comments

Comments
 (0)