Skip to content

Commit a1ca58d

Browse files
committed
Add unique_[by_]with_hasher()
1 parent 4f0dcd8 commit a1ca58d

File tree

2 files changed

+82
-19
lines changed

2 files changed

+82
-19
lines changed

src/lib.rs

Lines changed: 59 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1419,7 +1419,7 @@ pub trait Itertools: Iterator {
14191419
/// Setting it manually using this function can expose a DoS attack vector.
14201420
///
14211421
/// ```
1422-
/// use ahash::RandomState;
1422+
/// use std::hash::RandomState;
14231423
/// use itertools::Itertools;
14241424
///
14251425
/// let data = vec![10, 20, 30, 20, 40, 10, 50];
@@ -1464,15 +1464,15 @@ pub trait Itertools: Iterator {
14641464
}
14651465

14661466
/// Return an iterator which yields the same elements as the one returned by
1467-
/// [.duplicates()](crate::Itertools::duplicates), but uses the specified hash builder to hash
1468-
/// the keys for comparison.
1467+
/// [.duplicates_by()](crate::Itertools::duplicates_by), but uses the specified hash builder to
1468+
/// hash the keys for comparison.
14691469
///
14701470
/// Warning: `hash_builder` is normally randomly generated, and is designed to allow it's
14711471
/// users to be resistant to attacks that cause many collisions and very poor performance.
14721472
/// Setting it manually using this function can expose a DoS attack vector.
14731473
///
14741474
/// ```
1475-
/// use ahash::RandomState;
1475+
/// use std::hash::RandomState;
14761476
/// use itertools::Itertools;
14771477
///
14781478
/// let data = vec!["a", "bb", "aa", "c", "ccc"];
@@ -1518,7 +1518,33 @@ pub trait Itertools: Iterator {
15181518
Self: Sized,
15191519
Self::Item: Clone + Eq + Hash,
15201520
{
1521-
unique_impl::unique(self)
1521+
unique_impl::unique_with_hasher(self, RandomState::new())
1522+
}
1523+
1524+
/// Return an iterator which yields the same elements as the one returned by
1525+
/// [.unique()](crate::Itertools::unique), but uses the specified hash builder to hash the
1526+
/// elements for comparison.
1527+
///
1528+
/// Warning: `hash_builder` is normally randomly generated, and is designed to allow it's
1529+
/// users to be resistant to attacks that cause many collisions and very poor performance.
1530+
/// Setting it manually using this function can expose a DoS attack vector.
1531+
///
1532+
/// ```
1533+
/// use std::hash::RandomState;
1534+
/// use itertools::Itertools;
1535+
///
1536+
/// let data = vec![10, 20, 30, 20, 40, 10, 50];
1537+
/// itertools::assert_equal(data.into_iter().unique_with_hasher(RandomState::new()),
1538+
/// vec![10, 20, 30, 40, 50]);
1539+
/// ```
1540+
#[cfg(feature = "use_std")]
1541+
fn unique_with_hasher<S>(self, hash_builder: S) -> Unique<Self, S>
1542+
where
1543+
Self: Sized,
1544+
Self::Item: Clone + Eq + Hash,
1545+
S: BuildHasher,
1546+
{
1547+
unique_impl::unique_with_hasher(self, hash_builder)
15221548
}
15231549

15241550
/// Return an iterator adaptor that filters out elements that have
@@ -1546,7 +1572,34 @@ pub trait Itertools: Iterator {
15461572
V: Eq + Hash,
15471573
F: FnMut(&Self::Item) -> V,
15481574
{
1549-
unique_impl::unique_by(self, f)
1575+
unique_impl::unique_by_with_hasher(self, f, RandomState::new())
1576+
}
1577+
1578+
/// Return an iterator which yields the same elements as the one returned by
1579+
/// [.unique_by()](crate::Itertools::unique_by), but uses the specified hash builder to hash
1580+
/// the elements for comparison.
1581+
///
1582+
/// Warning: `hash_builder` is normally randomly generated, and is designed to allow it's
1583+
/// users to be resistant to attacks that cause many collisions and very poor performance.
1584+
/// Setting it manually using this function can expose a DoS attack vector.
1585+
///
1586+
/// ```
1587+
/// use std::hash::RandomState;
1588+
/// use itertools::Itertools;
1589+
///
1590+
/// let data = vec!["a", "bb", "aa", "c", "ccc"];
1591+
/// itertools::assert_equal(data.into_iter().unique_by_with_hasher(|s| s.len(), RandomState::new()),
1592+
/// vec!["a", "bb", "ccc"]);
1593+
/// ```
1594+
#[cfg(feature = "use_std")]
1595+
fn unique_by_with_hasher<V, F, S>(self, f: F, hash_builder: S) -> UniqueBy<Self, V, F, S>
1596+
where
1597+
Self: Sized,
1598+
V: Eq + Hash,
1599+
F: FnMut(&Self::Item) -> V,
1600+
S: BuildHasher,
1601+
{
1602+
unique_impl::unique_by_with_hasher(self, f, hash_builder)
15501603
}
15511604

15521605
/// Return an iterator adaptor that borrows from this iterator and

src/unique_impl.rs

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,62 +1,70 @@
1+
use core::hash::BuildHasher;
12
use std::collections::hash_map::Entry;
2-
use std::collections::{HashMap, HashSet};
3+
use std::collections::HashMap;
34
use std::fmt;
4-
use std::hash::Hash;
5+
use std::hash::{Hash, RandomState};
56
use std::iter::FusedIterator;
67

78
/// An iterator adapter to filter out duplicate elements.
89
///
910
/// See [`.unique_by()`](crate::Itertools::unique) for more information.
1011
#[derive(Clone)]
1112
#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
12-
pub struct UniqueBy<I: Iterator, V, F> {
13+
pub struct UniqueBy<I: Iterator, V, F, S = RandomState>
14+
where
15+
S: BuildHasher,
16+
{
1317
iter: I,
1418
// Use a Hashmap for the Entry API in order to prevent hashing twice.
1519
// This can maybe be replaced with a HashSet once `get_or_insert_with`
1620
// or a proper Entry API for Hashset is stable and meets this msrv
17-
used: HashMap<V, ()>,
21+
used: HashMap<V, (), S>,
1822
f: F,
1923
}
2024

21-
impl<I, V, F> fmt::Debug for UniqueBy<I, V, F>
25+
impl<I, V, F, S> fmt::Debug for UniqueBy<I, V, F, S>
2226
where
2327
I: Iterator + fmt::Debug,
2428
V: fmt::Debug + Hash + Eq,
29+
S: BuildHasher,
2530
{
2631
debug_fmt_fields!(UniqueBy, iter, used);
2732
}
2833

2934
/// Create a new `UniqueBy` iterator.
30-
pub fn unique_by<I, V, F>(iter: I, f: F) -> UniqueBy<I, V, F>
35+
pub fn unique_by_with_hasher<I, V, F, S>(iter: I, f: F, hash_builder: S) -> UniqueBy<I, V, F, S>
3136
where
3237
V: Eq + Hash,
3338
F: FnMut(&I::Item) -> V,
3439
I: Iterator,
40+
S: BuildHasher,
3541
{
3642
UniqueBy {
3743
iter,
38-
used: HashMap::new(),
44+
used: HashMap::with_hasher(hash_builder),
3945
f,
4046
}
4147
}
4248

4349
// count the number of new unique keys in iterable (`used` is the set already seen)
44-
fn count_new_keys<I, K>(mut used: HashMap<K, ()>, iterable: I) -> usize
50+
fn count_new_keys<I, K, S>(mut used: HashSet<K, S>, iterable: I) -> usize
4551
where
4652
I: IntoIterator<Item = K>,
4753
K: Hash + Eq,
54+
S: BuildHasher,
4855
{
4956
let iter = iterable.into_iter();
5057
let current_used = used.len();
5158
used.extend(iter.map(|key| (key, ())));
5259
used.len() - current_used
5360
}
5461

55-
impl<I, V, F> Iterator for UniqueBy<I, V, F>
62+
impl<I, V, F, S> Iterator for UniqueBy<I, V, F, S>
5663
where
5764
I: Iterator,
5865
V: Eq + Hash,
5966
F: FnMut(&I::Item) -> V,
67+
S: BuildHasher,
6068
{
6169
type Item = I::Item;
6270

@@ -157,12 +165,13 @@ where
157165
/// See [`.unique()`](crate::Itertools::unique) for more information.
158166
#[derive(Clone)]
159167
#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
160-
pub struct Unique<I>
168+
pub struct Unique<I, S = RandomState>
161169
where
162170
I: Iterator,
163171
I::Item: Eq + Hash + Clone,
172+
S: BuildHasher,
164173
{
165-
iter: UniqueBy<I, I::Item, ()>,
174+
iter: UniqueBy<I, I::Item, (), S>,
166175
}
167176

168177
impl<I> fmt::Debug for Unique<I>
@@ -173,15 +182,16 @@ where
173182
debug_fmt_fields!(Unique, iter);
174183
}
175184

176-
pub fn unique<I>(iter: I) -> Unique<I>
185+
pub fn unique_with_hasher<I, S>(iter: I, hash_builder: S) -> Unique<I, S>
177186
where
178187
I: Iterator,
179188
I::Item: Eq + Hash + Clone,
189+
S: BuildHasher,
180190
{
181191
Unique {
182192
iter: UniqueBy {
183193
iter,
184-
used: HashMap::new(),
194+
used: HashMap::with_hasher(hash_builder),
185195
f: (),
186196
},
187197
}

0 commit comments

Comments
 (0)