Skip to content

Commit 99836ec

Browse files
philnik777Lukacma
authored andcommitted
Reapply "[libc++] Optimize __hash_table::erase(iterator, iterator)" (llvm#162850)
This reapplication fixes the use after free caused by not properly updating the bucket list in one case. Original commit message: Instead of just calling the single element `erase` on every element of the range, we can combine some of the operations in a custom implementation. Specifically, we don't need to search for the previous node or re-link the list every iteration. Removing this unnecessary work results in some nice performance improvements: ``` ----------------------------------------------------------------------------------------------------------------------- Benchmark old new ----------------------------------------------------------------------------------------------------------------------- std::unordered_set<int>::erase(iterator, iterator) (erase half the container)/0 457 ns 459 ns std::unordered_set<int>::erase(iterator, iterator) (erase half the container)/32 995 ns 626 ns std::unordered_set<int>::erase(iterator, iterator) (erase half the container)/1024 18196 ns 7995 ns std::unordered_set<int>::erase(iterator, iterator) (erase half the container)/8192 124722 ns 70125 ns std::unordered_set<std::string>::erase(iterator, iterator) (erase half the container)/0 456 ns 461 ns std::unordered_set<std::string>::erase(iterator, iterator) (erase half the container)/32 1183 ns 769 ns std::unordered_set<std::string>::erase(iterator, iterator) (erase half the container)/1024 27827 ns 18614 ns std::unordered_set<std::string>::erase(iterator, iterator) (erase half the container)/8192 266681 ns 226107 ns std::unordered_map<int, int>::erase(iterator, iterator) (erase half the container)/0 455 ns 462 ns std::unordered_map<int, int>::erase(iterator, iterator) (erase half the container)/32 996 ns 659 ns std::unordered_map<int, int>::erase(iterator, iterator) (erase half the container)/1024 15963 ns 8108 ns std::unordered_map<int, int>::erase(iterator, iterator) (erase half the container)/8192 136493 ns 71848 ns std::unordered_multiset<int>::erase(iterator, iterator) (erase half the container)/0 454 ns 455 ns std::unordered_multiset<int>::erase(iterator, iterator) (erase half the container)/32 985 ns 703 ns std::unordered_multiset<int>::erase(iterator, iterator) (erase half the container)/1024 16277 ns 9085 ns std::unordered_multiset<int>::erase(iterator, iterator) (erase half the container)/8192 125736 ns 82710 ns std::unordered_multimap<int, int>::erase(iterator, iterator) (erase half the container)/0 457 ns 454 ns std::unordered_multimap<int, int>::erase(iterator, iterator) (erase half the container)/32 1091 ns 646 ns std::unordered_multimap<int, int>::erase(iterator, iterator) (erase half the container)/1024 17784 ns 7664 ns std::unordered_multimap<int, int>::erase(iterator, iterator) (erase half the container)/8192 127098 ns 72806 ns ``` This reverts commit acc3a62.
1 parent 5c18a61 commit 99836ec

File tree

4 files changed

+124
-25
lines changed

4 files changed

+124
-25
lines changed

libcxx/docs/ReleaseNotes/22.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ Improvements and New Features
6262
has been improved by up to 3x
6363
- The performance of ``insert(iterator, iterator)`` of ``map``, ``set``, ``multimap`` and ``multiset`` has been improved
6464
by up to 2.5x
65+
- The performance of ``erase(iterator, iterator)`` in the unordered containers has been improved by up to 1.9x
6566
- The performance of ``map::insert_or_assign`` has been improved by up to 2x
6667
- ``ofstream::write`` has been optimized to pass through large strings to system calls directly instead of copying them
6768
in chunks into a buffer.

libcxx/include/__hash_table

Lines changed: 75 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1037,7 +1037,21 @@ private:
10371037
}
10381038
_LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__hash_table&, false_type) _NOEXCEPT {}
10391039

1040-
_LIBCPP_HIDE_FROM_ABI void __deallocate_node(__next_pointer __np) _NOEXCEPT;
1040+
_LIBCPP_HIDE_FROM_ABI void __deallocate_node(__node_pointer __nd) _NOEXCEPT {
1041+
auto& __alloc = __node_alloc();
1042+
__node_traits::destroy(__alloc, std::addressof(__nd->__get_value()));
1043+
std::__destroy_at(std::__to_address(__nd));
1044+
__node_traits::deallocate(__alloc, __nd, 1);
1045+
}
1046+
1047+
_LIBCPP_HIDE_FROM_ABI void __deallocate_node_list(__next_pointer __np) _NOEXCEPT {
1048+
while (__np != nullptr) {
1049+
__next_pointer __next = __np->__next_;
1050+
__deallocate_node(__np->__upcast());
1051+
__np = __next;
1052+
}
1053+
}
1054+
10411055
_LIBCPP_HIDE_FROM_ABI __next_pointer __detach() _NOEXCEPT;
10421056

10431057
template <class _From, class _ValueT = _Tp, __enable_if_t<__is_hash_value_type<_ValueT>::value, int> = 0>
@@ -1175,7 +1189,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::~__hash_table() {
11751189
static_assert(is_copy_constructible<hasher>::value, "Hasher must be copy-constructible.");
11761190
#endif
11771191

1178-
__deallocate_node(__first_node_.__next_);
1192+
__deallocate_node_list(__first_node_.__next_);
11791193
}
11801194

11811195
template <class _Tp, class _Hash, class _Equal, class _Alloc>
@@ -1251,7 +1265,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::operator=(const __hash_table& __other)
12511265
// At this point we either have consumed the whole incoming hash table, or we don't have any more nodes to reuse in
12521266
// the destination. Either continue with constructing new nodes, or deallocate the left over nodes.
12531267
if (__own_iter->__next_) {
1254-
__deallocate_node(__own_iter->__next_);
1268+
__deallocate_node_list(__own_iter->__next_);
12551269
__own_iter->__next_ = nullptr;
12561270
} else {
12571271
__copy_construct(__other_iter, __own_iter, __current_chash);
@@ -1262,19 +1276,6 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::operator=(const __hash_table& __other)
12621276
return *this;
12631277
}
12641278

1265-
template <class _Tp, class _Hash, class _Equal, class _Alloc>
1266-
void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__deallocate_node(__next_pointer __np) _NOEXCEPT {
1267-
__node_allocator& __na = __node_alloc();
1268-
while (__np != nullptr) {
1269-
__next_pointer __next = __np->__next_;
1270-
__node_pointer __real_np = __np->__upcast();
1271-
__node_traits::destroy(__na, std::addressof(__real_np->__get_value()));
1272-
std::__destroy_at(std::addressof(*__real_np));
1273-
__node_traits::deallocate(__na, __real_np, 1);
1274-
__np = __next;
1275-
}
1276-
}
1277-
12781279
template <class _Tp, class _Hash, class _Equal, class _Alloc>
12791280
typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__next_pointer
12801281
__hash_table<_Tp, _Hash, _Equal, _Alloc>::__detach() _NOEXCEPT {
@@ -1318,7 +1319,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__move_assign(__hash_table& __u,
13181319
max_load_factor() = __u.max_load_factor();
13191320
if (bucket_count() != 0) {
13201321
__next_pointer __cache = __detach();
1321-
auto __guard = std::__make_scope_guard([&] { __deallocate_node(__cache); });
1322+
auto __guard = std::__make_scope_guard([&] { __deallocate_node_list(__cache); });
13221323
const_iterator __i = __u.begin();
13231324
while (__cache != nullptr && __u.size() != 0) {
13241325
__assign_value(__cache->__upcast()->__get_value(), std::move(__u.remove(__i++)->__get_value()));
@@ -1353,7 +1354,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__assign_unique(_InputIterator __
13531354

13541355
if (bucket_count() != 0) {
13551356
__next_pointer __cache = __detach();
1356-
auto __guard = std::__make_scope_guard([&] { __deallocate_node(__cache); });
1357+
auto __guard = std::__make_scope_guard([&] { __deallocate_node_list(__cache); });
13571358
for (; __cache != nullptr && __first != __last; ++__first) {
13581359
__assign_value(__cache->__upcast()->__get_value(), *__first);
13591360
__next_pointer __next = __cache->__next_;
@@ -1374,7 +1375,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__assign_multi(_InputIterator __f
13741375
"__assign_multi may only be called with the containers value type or the nodes value type");
13751376
if (bucket_count() != 0) {
13761377
__next_pointer __cache = __detach();
1377-
auto __guard = std::__make_scope_guard([&] { __deallocate_node(__cache); });
1378+
auto __guard = std::__make_scope_guard([&] { __deallocate_node_list(__cache); });
13781379
for (; __cache != nullptr && __first != __last; ++__first) {
13791380
__assign_value(__cache->__upcast()->__get_value(), *__first);
13801381
__next_pointer __next = __cache->__next_;
@@ -1413,7 +1414,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::end() const _NOEXCEPT {
14131414
template <class _Tp, class _Hash, class _Equal, class _Alloc>
14141415
void __hash_table<_Tp, _Hash, _Equal, _Alloc>::clear() _NOEXCEPT {
14151416
if (size() > 0) {
1416-
__deallocate_node(__first_node_.__next_);
1417+
__deallocate_node_list(__first_node_.__next_);
14171418
__first_node_.__next_ = nullptr;
14181419
size_type __bc = bucket_count();
14191420
for (size_type __i = 0; __i < __bc; ++__i)
@@ -1873,12 +1874,61 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __p) {
18731874
template <class _Tp, class _Hash, class _Equal, class _Alloc>
18741875
typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
18751876
__hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __first, const_iterator __last) {
1876-
for (const_iterator __p = __first; __first != __last; __p = __first) {
1877-
++__first;
1878-
erase(__p);
1877+
if (__first == __last)
1878+
return iterator(__last.__node_);
1879+
1880+
// current node
1881+
__next_pointer __current = __first.__node_;
1882+
size_type __bucket_count = bucket_count();
1883+
size_t __chash = std::__constrain_hash(__current->__hash(), __bucket_count);
1884+
// find previous node
1885+
__next_pointer __before_first = __bucket_list_[__chash];
1886+
for (; __before_first->__next_ != __current; __before_first = __before_first->__next_)
1887+
;
1888+
1889+
__next_pointer __last_node = __last.__node_;
1890+
1891+
// If __before_first is in the same bucket (i.e. the first element we erase is not the first in the bucket), clear
1892+
// this bucket first without re-linking it
1893+
if (__before_first != __first_node_.__ptr() &&
1894+
std::__constrain_hash(__before_first->__hash(), __bucket_count) == __chash) {
1895+
while (__current != __last_node) {
1896+
auto __next = __current->__next_;
1897+
__deallocate_node(__current->__upcast());
1898+
__current = __next;
1899+
--__size_;
1900+
1901+
if (__next) {
1902+
if (auto __next_chash = std::__constrain_hash(__next->__hash(), __bucket_count); __next_chash != __chash) {
1903+
__bucket_list_[__next_chash] = __before_first;
1904+
__chash = __next_chash;
1905+
break;
1906+
}
1907+
}
1908+
}
1909+
}
1910+
1911+
while (__current != __last_node) {
1912+
auto __next = __current->__next_;
1913+
__deallocate_node(__current->__upcast());
1914+
__current = __next;
1915+
--__size_;
1916+
1917+
// When switching buckets, set the old bucket to be empty and update the next bucket to have __before_first as its
1918+
// before-first element
1919+
if (__next) {
1920+
if (auto __next_chash = std::__constrain_hash(__next->__hash(), __bucket_count); __next_chash != __chash) {
1921+
__bucket_list_[__chash] = nullptr;
1922+
__bucket_list_[__next_chash] = __before_first;
1923+
__chash = __next_chash;
1924+
}
1925+
}
18791926
}
1880-
__next_pointer __np = __last.__node_;
1881-
return iterator(__np);
1927+
1928+
// re-link __before_first with __last
1929+
__before_first->__next_ = __current;
1930+
1931+
return iterator(__last.__node_);
18821932
}
18831933

18841934
template <class _Tp, class _Hash, class _Equal, class _Alloc>

libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_range.pass.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,37 @@ int main(int, char**) {
9191
assert(c.size() == 0);
9292
assert(k == c.end());
9393
}
94+
{ // Make sure we're properly destroying the elements when erasing
95+
{ // When erasing part of a bucket
96+
std::unordered_multimap<int, std::string> map;
97+
map.insert(std::make_pair(1, "This is a long string to make sure ASan can detect a memory leak"));
98+
map.insert(std::make_pair(1, "This is another long string to make sure ASan can detect a memory leak"));
99+
map.erase(++map.begin(), map.end());
100+
}
101+
{ // When erasing the whole bucket
102+
std::unordered_multimap<int, std::string> map;
103+
map.insert(std::make_pair(1, "This is a long string to make sure ASan can detect a memory leak"));
104+
map.insert(std::make_pair(1, "This is another long string to make sure ASan can detect a memory leak"));
105+
map.erase(map.begin(), map.end());
106+
}
107+
}
108+
{ // Make sure that we're properly updating the bucket list when starting within a bucket
109+
struct MyHash {
110+
size_t operator()(size_t v) const { return v; }
111+
};
112+
std::unordered_multimap<size_t, size_t, MyHash> map;
113+
size_t collision_val = 2 + map.bucket_count(); // try to get a bucket collision
114+
map.rehash(3);
115+
map.insert(std::pair<size_t, size_t>(1, 1));
116+
map.insert(std::pair<size_t, size_t>(collision_val, 1));
117+
map.insert(std::pair<size_t, size_t>(2, 1));
118+
LIBCPP_ASSERT(map.bucket(2) == map.bucket(collision_val));
119+
120+
auto erase = map.equal_range(2);
121+
map.erase(erase.first, erase.second);
122+
for (const auto& v : map)
123+
assert(v.first == 1 || v.first == collision_val);
124+
}
94125
#if TEST_STD_VER >= 11
95126
{
96127
typedef std::unordered_multimap<int,

libcxx/test/std/containers/unord/unord.multiset/erase_range.pass.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,23 @@ int main(int, char**) {
4747
assert(c.size() == 0);
4848
assert(k == c.end());
4949
}
50+
{ // Make sure that we're properly updating the bucket list when starting within a bucket
51+
struct MyHash {
52+
size_t operator()(size_t v) const { return v; }
53+
};
54+
std::unordered_multiset<size_t, MyHash> map;
55+
size_t collision_val = 2 + map.bucket_count(); // try to get a bucket collision
56+
map.rehash(3);
57+
map.insert(1);
58+
map.insert(collision_val);
59+
map.insert(2);
60+
LIBCPP_ASSERT(map.bucket(2) == map.bucket(collision_val));
61+
62+
auto erase = map.equal_range(2);
63+
map.erase(erase.first, erase.second);
64+
for (const auto& v : map)
65+
assert(v == 1 || v == collision_val);
66+
}
5067
#if TEST_STD_VER >= 11
5168
{
5269
typedef std::unordered_multiset<int, std::hash<int>, std::equal_to<int>, min_allocator<int>> C;

0 commit comments

Comments
 (0)