Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions libcxx/docs/DesignDocs/UnspecifiedBehaviorRandomization.rst
Original file line number Diff line number Diff line change
Expand Up @@ -82,5 +82,7 @@ Currently supported randomization
on the order of the remaining part
* ``std::nth_element``, there is no guarantee on the order from both sides of the
partition
* ``std::unordered_{set,map}``, there is no guarantee on the order of the elements
* ``std::unordered_{multiset,multimap}``, there is no guarantee on the order of the elements nor the order of equal elements

Patches welcome.
108 changes: 94 additions & 14 deletions libcxx/include/__hash_table
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@
#ifndef _LIBCPP___HASH_TABLE
#define _LIBCPP___HASH_TABLE

#include <__algorithm/iterator_operations.h>
#include <__algorithm/max.h>
#include <__algorithm/min.h>
#include <__assert>
#include <__bit/countl.h>
#include <__config>
#include <__debug_utils/randomize_range.h>
#include <__functional/hash.h>
#include <__functional/invoke.h>
#include <__iterator/iterator_traits.h>
Expand Down Expand Up @@ -775,6 +777,7 @@ public:
}

private:
_LIBCPP_HIDE_FROM_ABI __next_pointer __node_insert_multi_find_insertion_point(size_t __cp_hash, value_type& __cp_val);
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

__node_insert_multi_find_insertion_point is used in __node_insert_multi_prepare and __debug_randomize_order. Is refactoring warranted or should I instead copy the "find insertion point" logic/code from __node_insert_multi_prepare?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like that you extracted this into a separate function.

_LIBCPP_HIDE_FROM_ABI __next_pointer __node_insert_multi_prepare(size_t __cp_hash, value_type& __cp_val);
_LIBCPP_HIDE_FROM_ABI void __node_insert_multi_perform(__node_pointer __cp, __next_pointer __pn) _NOEXCEPT;

Expand Down Expand Up @@ -980,6 +983,9 @@ private:
template <bool _UniqueKeys>
_LIBCPP_HIDE_FROM_ABI void __do_rehash(size_type __n);

template <bool _UniqueKeys>
_LIBCPP_HIDE_FROM_ABI void __debug_randomize_order();

template <class... _Args>
_LIBCPP_HIDE_FROM_ABI __node_holder __construct_node(_Args&&... __args);

Expand Down Expand Up @@ -1395,22 +1401,13 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_unique(__node_pointer __
return pair<iterator, bool>(iterator(__existing_node), __inserted);
}

// Prepare the container for an insertion of the value __cp_val with the hash
// __cp_hash. This does a lookup into the container to see if __cp_value is
// already present, and performs a rehash if necessary. Returns a pointer to the
// last occurrence of __cp_val in the map.
//
// Note that this function does forward exceptions if key_eq() throws, and never
// mutates __value or actually inserts into the map.
// Does lookup into the container to see if __cp_val is already present, and
// returns a pointer to the last occurrence of __cp_val in the map.
template <class _Tp, class _Hash, class _Equal, class _Alloc>
typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__next_pointer
__hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi_prepare(size_t __cp_hash, value_type& __cp_val) {
size_type __bc = bucket_count();
if (size() + 1 > __bc * max_load_factor() || __bc == 0) {
__rehash_multi(std::max<size_type>(
2 * __bc + !std::__is_hash_power2(__bc), size_type(__math::ceil(float(size() + 1) / max_load_factor()))));
__bc = bucket_count();
}
__hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi_find_insertion_point(
size_t __cp_hash, value_type& __cp_val) {
size_type __bc = bucket_count();
size_t __chash = std::__constrain_hash(__cp_hash, __bc);
__next_pointer __pn = __bucket_list_[__chash];
if (__pn != nullptr) {
Expand All @@ -1434,6 +1431,24 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi_prepare(size_t __c
return __pn;
}

// Prepare the container for an insertion of the value __cp_val with the hash
// __cp_hash. This does a lookup into the container to see if __cp_value is
// already present, and performs a rehash if necessary. Returns a pointer to the
// last occurrence of __cp_val in the map.
//
// Note that this function does forward exceptions if key_eq() throws, and never
// mutates __value or actually inserts into the map.
template <class _Tp, class _Hash, class _Equal, class _Alloc>
typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__next_pointer
__hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi_prepare(size_t __cp_hash, value_type& __cp_val) {
size_type __bc = bucket_count();
if (size() + 1 > __bc * max_load_factor() || __bc == 0) {
__rehash_multi(std::max<size_type>(
2 * __bc + !std::__is_hash_power2(__bc), size_type(__math::ceil(float(size() + 1) / max_load_factor()))));
}
return __node_insert_multi_find_insertion_point(__cp_hash, __cp_val);
}

// Insert the node __cp into the container after __pn (which is the last node in
// the bucket that compares equal to __cp). Rehashing, and checking for
// uniqueness has already been performed (in __node_insert_multi_prepare), so
Expand Down Expand Up @@ -1702,6 +1717,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__rehash(size_type __n) _LIBCPP_D
template <class _Tp, class _Hash, class _Equal, class _Alloc>
template <bool _UniqueKeys>
void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__do_rehash(size_type __nbc) {
__debug_randomize_order<_UniqueKeys>();
__pointer_allocator& __npa = __bucket_list_.get_deleter().__alloc();
__bucket_list_.reset(__nbc > 0 ? __pointer_alloc_traits::allocate(__npa, __nbc) : nullptr);
__bucket_list_.get_deleter().size() = __nbc;
Expand Down Expand Up @@ -1741,6 +1757,70 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__do_rehash(size_type __nbc) {
}
}

template <class _Tp, class _Hash, class _Equal, class _Alloc>
template <bool _UniqueKeys>
void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__debug_randomize_order() {
#ifdef _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY

struct __nh_vec {
using __nh_allocator = __rebind_alloc<__node_traits, __node_holder>;
using __pointer = typename allocator_traits<__nh_allocator>::pointer;
__nh_allocator __nh_alloc;
__pointer __p = __pointer();
size_type __total_nodes = 0;
size_type __initialized_nodes = 0;
__nh_vec(size_type __n) : __nh_alloc(), __p(__nh_alloc.allocate(__n)), __total_nodes(__n) {}
~__nh_vec() {
if (__p) {
std::__destroy(__p, __p + __initialized_nodes);
__nh_alloc.deallocate(__p, __total_nodes);
}
}
};

struct __index_vec {
using __index_allocator = __rebind_alloc<__node_traits, size_type>;
using __pointer = typename allocator_traits<__index_allocator>::pointer;
__index_allocator __index_alloc;
__pointer __p = __pointer();
size_type __total_indices = 0;
size_type __initialized_indices = 0;
__index_vec(size_type __n) : __index_alloc(), __p(__index_alloc.allocate(__n)), __total_indices(__n) {}
~__index_vec() {
if (__p) {
std::__destroy(__p, __p + __initialized_indices);
__index_alloc.deallocate(__p, __total_indices);
}
}
};

__nh_vec __nhv(size());
__index_vec __iv(size());
Copy link
Author

@arvidjonasson arvidjonasson Aug 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd ideally want to use unique_ptr<__node_holder[], decltype(deleter)> and unique_ptr<size_type[], decltype(deleter)> (instead of __nh_vec and __index_vec) while still being able to use the user defined allocator. But min_pointer<T> is giving me problems (by not being convertible to T*). Is someone familiar with the matter?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you paste what the exact error is? It's probably something benign like a missing std::__to_address call.


// Move nodes into temporary storage.
for (; __nhv.__initialized_nodes < __nhv.__total_nodes; ++__nhv.__initialized_nodes)
std::__construct_at(std::addressof(__nhv.__p[__nhv.__initialized_nodes]), remove(begin()));

// Randomize the order of indices.
for (; __iv.__initialized_indices < __nhv.__initialized_nodes; ++__iv.__initialized_indices)
std::__construct_at(std::addressof(__iv.__p[__iv.__initialized_indices]), __iv.__initialized_indices);
__debug_randomize_range<_ClassicAlgPolicy>(__iv.__p, __iv.__p + __iv.__initialized_indices);

// Reinsert nodes into the hash table in randomized order.
for (size_type __i = 0; __i < __iv.__initialized_indices; ++__i) {
__node_holder& __nh = __nhv.__p[__iv.__p[__i]];
__node_pointer __np = __nh->__upcast();
if _LIBCPP_CONSTEXPR_SINCE_CXX17 (_UniqueKeys) {
__node_insert_unique_perform(__np);
} else {
__next_pointer __pn = __node_insert_multi_find_insertion_point(__np->__hash(), __np->__get_value());
__node_insert_multi_perform(__np, __pn);
}
__nh.release();
}
#endif
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
#endif
#endif // defined(_LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY)

}

template <class _Tp, class _Hash, class _Equal, class _Alloc>
template <class _Key>
typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// Test std::unordered_{set,map,multiset,multimap} randomization

// UNSUPPORTED: c++03
// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY

#include <unordered_set>
#include <unordered_map>
#include <cassert>
#include <vector>
#include <algorithm>

const int kSize = 128;

template <typename T, typename F>
T get_random(F get_value) {
Comment on lines +22 to +23
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
template <typename T, typename F>
T get_random(F get_value) {
template <typename Container, typename F>
Container get_random(F get_value) {

T v;
v.reserve(kSize);
Comment on lines +24 to +25
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
T v;
v.reserve(kSize);
Container c;
c.reserve(kSize);

Or something like that -- minor change but it makes things easier to read.

for (int i = 0; i < kSize; ++i) {
v.insert(get_value());
}
v.rehash(v.bucket_count() + 1);
return v;
}

template <typename T, typename F>
T get_deterministic(F get_value) {
T v;
v.reserve(kSize);
for (int i = 0; i < kSize; ++i) {
v.insert(get_value());
}
return v;
}

template <typename T>
struct RemoveConst {
using type = T;
};

template <typename T, typename U>
struct RemoveConst<std::pair<const T, U>> {
using type = std::pair<T, U>;
};

template <typename T, typename F>
void test_randomization(F get_value) {
T t1 = get_deterministic<T>(get_value), t2 = get_random<T>(get_value);

// Convert pair<const K, V> to pair<K, V> so it can be sorted
using U = typename RemoveConst<typename T::value_type>::type;

std::vector<U> t1v(t1.begin(), t1.end()), t2v(t2.begin(), t2.end());

assert(t1v != t2v);

std::sort(t1v.begin(), t1v.end());
std::sort(t2v.begin(), t2v.end());

assert(t1v == t2v);
}

int main(int, char**) {
int i = 0, j = 0;
test_randomization<std::unordered_set<int>>([i]() mutable { return i++; });
test_randomization<std::unordered_map<int, int>>([i, j]() mutable { return std::make_pair(i++, j++); });
test_randomization<std::unordered_multiset<int>>([i]() mutable { return i++ % 32; });
test_randomization<std::unordered_multimap<int, int>>([i, j]() mutable { return std::make_pair(i++ % 32, j++); });
return 0;
}