Skip to content

Commit 4f0dcd8

Browse files
committed
Add duplicates_[by_]with_hasher()
1 parent f80883b commit 4f0dcd8

File tree

3 files changed

+99
-25
lines changed

3 files changed

+99
-25
lines changed

src/duplicates_impl.rs

Lines changed: 37 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,37 @@
1-
use std::hash::Hash;
1+
use core::hash::BuildHasher;
2+
use std::hash::{Hash, RandomState};
23

34
mod private {
5+
use core::hash::BuildHasher;
46
use std::collections::HashMap;
57
use std::fmt;
68
use std::hash::Hash;
79

810
#[derive(Clone)]
911
#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
10-
pub struct DuplicatesBy<I: Iterator, Key, F> {
12+
pub struct DuplicatesBy<I: Iterator, Key, F, S>
13+
where
14+
S: BuildHasher,
15+
{
1116
pub(crate) iter: I,
12-
pub(crate) meta: Meta<Key, F>,
17+
pub(crate) meta: Meta<Key, F, S>,
1318
}
1419

15-
impl<I, V, F> fmt::Debug for DuplicatesBy<I, V, F>
20+
impl<I, V, F, S> fmt::Debug for DuplicatesBy<I, V, F, S>
1621
where
1722
I: Iterator + fmt::Debug,
1823
V: fmt::Debug + Hash + Eq,
24+
S: BuildHasher,
1925
{
2026
debug_fmt_fields!(DuplicatesBy, iter, meta.used);
2127
}
2228

23-
impl<I: Iterator, Key: Eq + Hash, F> DuplicatesBy<I, Key, F> {
24-
pub(crate) fn new(iter: I, key_method: F) -> Self {
29+
impl<I: Iterator, Key: Eq + Hash, F, S: BuildHasher> DuplicatesBy<I, Key, F, S> {
30+
pub(crate) fn new(iter: I, key_method: F, hash_builder: S) -> Self {
2531
Self {
2632
iter,
2733
meta: Meta {
28-
used: HashMap::new(),
34+
used: HashMap::with_hasher(hash_builder),
2935
pending: 0,
3036
key_method,
3137
},
@@ -34,15 +40,16 @@ mod private {
3440
}
3541

3642
#[derive(Clone)]
37-
pub struct Meta<Key, F> {
38-
used: HashMap<Key, bool>,
43+
pub struct Meta<Key, F, S> {
44+
used: HashMap<Key, bool, S>,
3945
pending: usize,
4046
key_method: F,
4147
}
4248

43-
impl<Key, F> Meta<Key, F>
49+
impl<Key, F, S> Meta<Key, F, S>
4450
where
4551
Key: Eq + Hash,
52+
S: BuildHasher,
4653
{
4754
/// Takes an item and returns it back to the caller if it's the second time we see it.
4855
/// Otherwise the item is consumed and None is returned
@@ -68,11 +75,12 @@ mod private {
6875
}
6976
}
7077

71-
impl<I, Key, F> Iterator for DuplicatesBy<I, Key, F>
78+
impl<I, Key, F, S> Iterator for DuplicatesBy<I, Key, F, S>
7279
where
7380
I: Iterator,
7481
Key: Eq + Hash,
7582
F: KeyMethod<Key, I::Item>,
83+
S: BuildHasher,
7684
{
7785
type Item = I::Item;
7886

@@ -102,11 +110,12 @@ mod private {
102110
}
103111
}
104112

105-
impl<I, Key, F> DoubleEndedIterator for DuplicatesBy<I, Key, F>
113+
impl<I, Key, F, S> DoubleEndedIterator for DuplicatesBy<I, Key, F, S>
106114
where
107115
I: DoubleEndedIterator,
108116
Key: Eq + Hash,
109117
F: KeyMethod<Key, I::Item>,
118+
S: BuildHasher,
110119
{
111120
fn next_back(&mut self) -> Option<Self::Item> {
112121
let Self { iter, meta } = self;
@@ -189,28 +198,35 @@ mod private {
189198
/// An iterator adapter to filter for duplicate elements.
190199
///
191200
/// See [`.duplicates_by()`](crate::Itertools::duplicates_by) for more information.
192-
pub type DuplicatesBy<I, V, F> = private::DuplicatesBy<I, V, private::ByFn<F>>;
193-
194-
/// Create a new `DuplicatesBy` iterator.
195-
pub fn duplicates_by<I, Key, F>(iter: I, f: F) -> DuplicatesBy<I, Key, F>
201+
pub type DuplicatesBy<I, V, F, S = RandomState> = private::DuplicatesBy<I, V, private::ByFn<F>, S>;
202+
203+
/// Create a new `DuplicatesBy` iterator with a specified hash builder.
204+
pub fn duplicates_by_with_hasher<I, Key, F, S>(
205+
iter: I,
206+
f: F,
207+
hash_builder: S,
208+
) -> DuplicatesBy<I, Key, F, S>
196209
where
197210
Key: Eq + Hash,
198211
F: FnMut(&I::Item) -> Key,
199212
I: Iterator,
213+
S: BuildHasher,
200214
{
201-
DuplicatesBy::new(iter, private::ByFn(f))
215+
DuplicatesBy::new(iter, private::ByFn(f), hash_builder)
202216
}
203217

204218
/// An iterator adapter to filter out duplicate elements.
205219
///
206220
/// See [`.duplicates()`](crate::Itertools::duplicates) for more information.
207-
pub type Duplicates<I> = private::DuplicatesBy<I, <I as Iterator>::Item, private::ById>;
221+
pub type Duplicates<I, S = RandomState> =
222+
private::DuplicatesBy<I, <I as Iterator>::Item, private::ById, S>;
208223

209-
/// Create a new `Duplicates` iterator.
210-
pub fn duplicates<I>(iter: I) -> Duplicates<I>
224+
/// Create a new `Duplicates` iterator with a specified hash builder.
225+
pub fn duplicates_with_hasher<I, S>(iter: I, hash_builder: S) -> Duplicates<I, S>
211226
where
212227
I: Iterator,
213228
I::Item: Eq + Hash,
229+
S: BuildHasher,
214230
{
215-
Duplicates::new(iter, private::ById)
231+
Duplicates::new(iter, private::ById, hash_builder)
216232
}

src/lib.rs

Lines changed: 61 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ use alloc::{collections::VecDeque, string::String, vec::Vec};
6363
pub use either::Either;
6464

6565
use core::borrow::Borrow;
66+
use core::hash::BuildHasher;
6667
use std::cmp::Ordering;
6768
#[cfg(feature = "use_std")]
6869
use std::collections::HashMap;
@@ -72,7 +73,7 @@ use std::fmt;
7273
#[cfg(feature = "use_alloc")]
7374
use std::fmt::Write;
7475
#[cfg(feature = "use_std")]
75-
use std::hash::Hash;
76+
use std::hash::{Hash, RandomState};
7677
use std::iter::{once, IntoIterator};
7778
#[cfg(feature = "use_alloc")]
7879
type VecDequeIntoIter<T> = alloc::collections::vec_deque::IntoIter<T>;
@@ -1406,7 +1407,33 @@ pub trait Itertools: Iterator {
14061407
Self: Sized,
14071408
Self::Item: Eq + Hash,
14081409
{
1409-
duplicates_impl::duplicates(self)
1410+
duplicates_impl::duplicates_with_hasher(self, RandomState::new())
1411+
}
1412+
1413+
/// Return an iterator which yields the same elements as the one returned by
1414+
/// [.duplicates()](crate::Itertools::duplicates), but uses the specified hash builder to hash
1415+
/// the elements for comparison.
1416+
///
1417+
/// Warning: `hash_builder` is normally randomly generated, and is designed to allow it's
1418+
/// users to be resistant to attacks that cause many collisions and very poor performance.
1419+
/// Setting it manually using this function can expose a DoS attack vector.
1420+
///
1421+
/// ```
1422+
/// use ahash::RandomState;
1423+
/// use itertools::Itertools;
1424+
///
1425+
/// let data = vec![10, 20, 30, 20, 40, 10, 50];
1426+
/// itertools::assert_equal(data.into_iter().duplicates_with_hasher(RandomState::new()),
1427+
/// vec![20,10]);
1428+
/// ```
1429+
#[cfg(feature = "use_std")]
1430+
fn duplicates_with_hasher<S>(self, hash_builder: S) -> Duplicates<Self, S>
1431+
where
1432+
Self: Sized,
1433+
Self::Item: Eq + Hash,
1434+
S: BuildHasher,
1435+
{
1436+
duplicates_impl::duplicates_with_hasher(self, hash_builder)
14101437
}
14111438

14121439
/// Return an iterator adaptor that produces elements that appear more than once during the
@@ -1433,7 +1460,38 @@ pub trait Itertools: Iterator {
14331460
V: Eq + Hash,
14341461
F: FnMut(&Self::Item) -> V,
14351462
{
1436-
duplicates_impl::duplicates_by(self, f)
1463+
duplicates_impl::duplicates_by_with_hasher(self, f, RandomState::new())
1464+
}
1465+
1466+
/// Return an iterator which yields the same elements as the one returned by
1467+
/// [.duplicates()](crate::Itertools::duplicates), but uses the specified hash builder to hash
1468+
/// the keys for comparison.
1469+
///
1470+
/// Warning: `hash_builder` is normally randomly generated, and is designed to allow it's
1471+
/// users to be resistant to attacks that cause many collisions and very poor performance.
1472+
/// Setting it manually using this function can expose a DoS attack vector.
1473+
///
1474+
/// ```
1475+
/// use ahash::RandomState;
1476+
/// use itertools::Itertools;
1477+
///
1478+
/// let data = vec!["a", "bb", "aa", "c", "ccc"];
1479+
/// itertools::assert_equal(data.into_iter().duplicates_by_with_hasher(|s| s.len(),RandomState::new()),
1480+
/// vec!["aa", "c"]);
1481+
/// ```
1482+
#[cfg(feature = "use_std")]
1483+
fn duplicates_by_with_hasher<V, F, S>(
1484+
self,
1485+
f: F,
1486+
hash_builder: S,
1487+
) -> DuplicatesBy<Self, V, F, S>
1488+
where
1489+
Self: Sized,
1490+
V: Eq + Hash,
1491+
F: FnMut(&Self::Item) -> V,
1492+
S: BuildHasher,
1493+
{
1494+
duplicates_impl::duplicates_by_with_hasher(self, f, hash_builder)
14371495
}
14381496

14391497
/// Return an iterator adaptor that filters out elements that have

src/unique_impl.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use std::collections::hash_map::Entry;
2-
use std::collections::HashMap;
2+
use std::collections::{HashMap, HashSet};
33
use std::fmt;
44
use std::hash::Hash;
55
use std::iter::FusedIterator;

0 commit comments

Comments
 (0)