-
Notifications
You must be signed in to change notification settings - Fork 15.2k
Reapply "[libc++] Optimize __hash_table::erase(iterator, iterator)" #162850
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Reapply "[libc++] Optimize __hash_table::erase(iterator, iterator)" #162850
Conversation
… (llvm#158769) This reverts commit acc3a62.
@llvm/pr-subscribers-libcxx Author: Nikolas Klauser (philnik777) ChangesThis reverts commit acc3a62. Full diff: https://github.com/llvm/llvm-project/pull/162850.diff 3 Files Affected:
diff --git a/libcxx/docs/ReleaseNotes/22.rst b/libcxx/docs/ReleaseNotes/22.rst
index ec23ba9d1e3a1..942d899c8919c 100644
--- a/libcxx/docs/ReleaseNotes/22.rst
+++ b/libcxx/docs/ReleaseNotes/22.rst
@@ -60,6 +60,7 @@ Improvements and New Features
has been improved by up to 3x
- The performance of ``insert(iterator, iterator)`` of ``map``, ``set``, ``multimap`` and ``multiset`` has been improved
by up to 2.5x
+- The performance of ``erase(iterator, iterator)`` in the unordered containers has been improved by up to 1.9x
- The performance of ``map::insert_or_assign`` has been improved by up to 2x
- ``ofstream::write`` has been optimized to pass through large strings to system calls directly instead of copying them
in chunks into a buffer.
diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index 74923ddb74e9c..2b4cfc2f691a1 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -1037,7 +1037,21 @@ private:
}
_LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__hash_table&, false_type) _NOEXCEPT {}
- _LIBCPP_HIDE_FROM_ABI void __deallocate_node(__next_pointer __np) _NOEXCEPT;
+ _LIBCPP_HIDE_FROM_ABI void __deallocate_node(__node_pointer __nd) _NOEXCEPT {
+ auto& __alloc = __node_alloc();
+ __node_traits::destroy(__alloc, std::addressof(__nd->__get_value()));
+ std::__destroy_at(std::__to_address(__nd));
+ __node_traits::deallocate(__alloc, __nd, 1);
+ }
+
+ _LIBCPP_HIDE_FROM_ABI void __deallocate_node_list(__next_pointer __np) _NOEXCEPT {
+ while (__np != nullptr) {
+ __next_pointer __next = __np->__next_;
+ __deallocate_node(__np->__upcast());
+ __np = __next;
+ }
+ }
+
_LIBCPP_HIDE_FROM_ABI __next_pointer __detach() _NOEXCEPT;
template <class _From, class _ValueT = _Tp, __enable_if_t<__is_hash_value_type<_ValueT>::value, int> = 0>
@@ -1175,7 +1189,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::~__hash_table() {
static_assert(is_copy_constructible<hasher>::value, "Hasher must be copy-constructible.");
#endif
- __deallocate_node(__first_node_.__next_);
+ __deallocate_node_list(__first_node_.__next_);
}
template <class _Tp, class _Hash, class _Equal, class _Alloc>
@@ -1251,7 +1265,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::operator=(const __hash_table& __other)
// At this point we either have consumed the whole incoming hash table, or we don't have any more nodes to reuse in
// the destination. Either continue with constructing new nodes, or deallocate the left over nodes.
if (__own_iter->__next_) {
- __deallocate_node(__own_iter->__next_);
+ __deallocate_node_list(__own_iter->__next_);
__own_iter->__next_ = nullptr;
} else {
__copy_construct(__other_iter, __own_iter, __current_chash);
@@ -1262,19 +1276,6 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::operator=(const __hash_table& __other)
return *this;
}
-template <class _Tp, class _Hash, class _Equal, class _Alloc>
-void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__deallocate_node(__next_pointer __np) _NOEXCEPT {
- __node_allocator& __na = __node_alloc();
- while (__np != nullptr) {
- __next_pointer __next = __np->__next_;
- __node_pointer __real_np = __np->__upcast();
- __node_traits::destroy(__na, std::addressof(__real_np->__get_value()));
- std::__destroy_at(std::addressof(*__real_np));
- __node_traits::deallocate(__na, __real_np, 1);
- __np = __next;
- }
-}
-
template <class _Tp, class _Hash, class _Equal, class _Alloc>
typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__next_pointer
__hash_table<_Tp, _Hash, _Equal, _Alloc>::__detach() _NOEXCEPT {
@@ -1318,7 +1319,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__move_assign(__hash_table& __u,
max_load_factor() = __u.max_load_factor();
if (bucket_count() != 0) {
__next_pointer __cache = __detach();
- auto __guard = std::__make_scope_guard([&] { __deallocate_node(__cache); });
+ auto __guard = std::__make_scope_guard([&] { __deallocate_node_list(__cache); });
const_iterator __i = __u.begin();
while (__cache != nullptr && __u.size() != 0) {
__assign_value(__cache->__upcast()->__get_value(), std::move(__u.remove(__i++)->__get_value()));
@@ -1353,7 +1354,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__assign_unique(_InputIterator __
if (bucket_count() != 0) {
__next_pointer __cache = __detach();
- auto __guard = std::__make_scope_guard([&] { __deallocate_node(__cache); });
+ auto __guard = std::__make_scope_guard([&] { __deallocate_node_list(__cache); });
for (; __cache != nullptr && __first != __last; ++__first) {
__assign_value(__cache->__upcast()->__get_value(), *__first);
__next_pointer __next = __cache->__next_;
@@ -1374,7 +1375,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__assign_multi(_InputIterator __f
"__assign_multi may only be called with the containers value type or the nodes value type");
if (bucket_count() != 0) {
__next_pointer __cache = __detach();
- auto __guard = std::__make_scope_guard([&] { __deallocate_node(__cache); });
+ auto __guard = std::__make_scope_guard([&] { __deallocate_node_list(__cache); });
for (; __cache != nullptr && __first != __last; ++__first) {
__assign_value(__cache->__upcast()->__get_value(), *__first);
__next_pointer __next = __cache->__next_;
@@ -1413,7 +1414,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::end() const _NOEXCEPT {
template <class _Tp, class _Hash, class _Equal, class _Alloc>
void __hash_table<_Tp, _Hash, _Equal, _Alloc>::clear() _NOEXCEPT {
if (size() > 0) {
- __deallocate_node(__first_node_.__next_);
+ __deallocate_node_list(__first_node_.__next_);
__first_node_.__next_ = nullptr;
size_type __bc = bucket_count();
for (size_type __i = 0; __i < __bc; ++__i)
@@ -1873,12 +1874,57 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __p) {
template <class _Tp, class _Hash, class _Equal, class _Alloc>
typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
__hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __first, const_iterator __last) {
- for (const_iterator __p = __first; __first != __last; __p = __first) {
- ++__first;
- erase(__p);
+ if (__first == __last)
+ return iterator(__last.__node_);
+
+ // current node
+ __next_pointer __current = __first.__node_;
+ size_type __bucket_count = bucket_count();
+ size_t __chash = std::__constrain_hash(__current->__hash(), __bucket_count);
+ // find previous node
+ __next_pointer __before_first = __bucket_list_[__chash];
+ for (; __before_first->__next_ != __current; __before_first = __before_first->__next_)
+ ;
+
+ __next_pointer __last_node = __last.__node_;
+
+ // If __before_first is in the same bucket (i.e. the first element we erase is not the first in the bucket), clear
+ // this bucket first without re-linking it
+ if (__before_first != __first_node_.__ptr() &&
+ std::__constrain_hash(__before_first->__hash(), __bucket_count) == __chash) {
+ while (__current != __last_node) {
+ if (auto __next_chash = std::__constrain_hash(__current->__hash(), __bucket_count); __next_chash != __chash) {
+ __chash = __next_chash;
+ break;
+ }
+ auto __next = __current->__next_;
+ __deallocate_node(__current->__upcast());
+ __current = __next;
+ --__size_;
+ }
}
- __next_pointer __np = __last.__node_;
- return iterator(__np);
+
+ while (__current != __last_node) {
+ auto __next = __current->__next_;
+ __deallocate_node(__current->__upcast());
+ __current = __next;
+ --__size_;
+
+ // When switching buckets, set the old bucket to be empty and update the next bucket to have __before_first as its
+ // before-first element
+ if (__next) {
+ if (auto __next_chash = std::__constrain_hash(__next->__hash(), __bucket_count); __next_chash != __chash) {
+ __bucket_list_[__chash] = nullptr;
+ __chash = __next_chash;
+ __bucket_list_[__chash] = __before_first;
+ }
+ }
+ }
+
+ // re-link __before_first with __last
+ __before_first->__next_ = __current;
+
+ return iterator(__last.__node_);
}
template <class _Tp, class _Hash, class _Equal, class _Alloc>
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_range.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_range.pass.cpp
index f8a2bdd3fee73..a8a9f5fdbb428 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_range.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_range.pass.cpp
@@ -91,6 +91,20 @@ int main(int, char**) {
assert(c.size() == 0);
assert(k == c.end());
}
+ { // Make sure we're properly destroying the elements when erasing
+ { // When erasing part of a bucket
+ std::unordered_multimap<int, std::string> map;
+ map.insert(std::make_pair(1, "This is a long string to make sure ASan can detect a memory leak"));
+ map.insert(std::make_pair(1, "This is another long string to make sure ASan can detect a memory leak"));
+ map.erase(++map.begin(), map.end());
+ }
+ { // When erasing the whole bucket
+ std::unordered_multimap<int, std::string> map;
+ map.insert(std::make_pair(1, "This is a long string to make sure ASan can detect a memory leak"));
+ map.insert(std::make_pair(1, "This is another long string to make sure ASan can detect a memory leak"));
+ map.erase(map.begin(), map.end());
+ }
+ }
#if TEST_STD_VER >= 11
{
typedef std::unordered_multimap<int,
|
@boomanaiden154 I've tried your reproducer, but it worked just fine on my machine. Could you share what exactly you're doing to get the reproducer? |
It was built with our internal build system, so I can't share an exact script you can just run. I just built the reproducer/libc++ with ASan, linked them together and got a failure when running the executable. Let me try and spend some time to see what's going on and get a setup that reproduces purely externally. Are we okay holding off landing this until I have a reproducer until sometime early next week (Monday/Tuesday)? |
Yeah, that's fine. I'm not in a rush. |
It looks like we use |
This reverts commit acc3a62.