-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[libc++] Optimize copy construction and assignment of __tree #151304
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
01b932d
to
651b570
Compare
@llvm/pr-subscribers-libcxx Author: Nikolas Klauser (philnik777) Changes
Full diff: https://github.com/llvm/llvm-project/pull/151304.diff 6 Files Affected:
diff --git a/libcxx/docs/ReleaseNotes/22.rst b/libcxx/docs/ReleaseNotes/22.rst
index 15bf46d44b07f..8b8dce5083149 100644
--- a/libcxx/docs/ReleaseNotes/22.rst
+++ b/libcxx/docs/ReleaseNotes/22.rst
@@ -43,6 +43,9 @@ Implemented Papers
Improvements and New Features
-----------------------------
+- The performance of ``map::map(const map&)`` has been improved up to 2.3x
+- The performance of ``map::operator=(const map&)`` has been improved by up to 11x
+
Deprecations and Removals
-------------------------
diff --git a/libcxx/include/__tree b/libcxx/include/__tree
index f8bb4f01b1e29..c1f667b7ec74f 100644
--- a/libcxx/include/__tree
+++ b/libcxx/include/__tree
@@ -1213,6 +1213,80 @@ private:
__node_pointer __cache_root_;
__node_pointer __cache_elem_;
};
+
+ class __tree_deleter {
+ __node_allocator& __alloc_;
+
+ public:
+ using pointer = __node_pointer;
+
+ _LIBCPP_HIDE_FROM_ABI __tree_deleter(__node_allocator& __alloc) : __alloc_(__alloc) {}
+
+ void operator()(__node_pointer __ptr) {
+ if (!__ptr)
+ return;
+
+ (*this)(static_cast<__node_pointer>(__ptr->__left_));
+
+ auto __right = __ptr->__right_;
+
+ __node_traits::destroy(__alloc_, std::addressof(__ptr->__value_));
+ __node_traits::deallocate(__alloc_, __ptr, 1);
+
+ (*this)(static_cast<__node_pointer>(__right));
+ }
+ };
+
+ _LIBCPP_HIDE_FROM_ABI __node_pointer __copy_construct_tree(__node_pointer __src) {
+ if (!__src)
+ return nullptr;
+
+ __node_holder __new_node = __construct_node(__src->__value_);
+
+ unique_ptr<__node, __tree_deleter> __left(
+ __copy_construct_tree(static_cast<__node_pointer>(__src->__left_)), __node_alloc_);
+ __node_pointer __right = __copy_construct_tree(static_cast<__node_pointer>(__src->__right_));
+
+ __node_pointer __new_node_ptr = __new_node.release();
+
+ __new_node_ptr->__is_black_ = __src->__is_black_;
+ __new_node_ptr->__left_ = static_cast<__node_base_pointer>(__left.release());
+ __new_node_ptr->__right_ = static_cast<__node_base_pointer>(__right);
+ if (__new_node_ptr->__left_)
+ __new_node_ptr->__left_->__parent_ = static_cast<__end_node_pointer>(__new_node_ptr);
+ if (__new_node_ptr->__right_)
+ __new_node_ptr->__right_->__parent_ = static_cast<__end_node_pointer>(__new_node_ptr);
+ return __new_node_ptr;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI __node_pointer __copy_assign_tree(__node_pointer __assign_to, __node_pointer __src) {
+ if (!__src) {
+ destroy(__assign_to);
+ return nullptr;
+ }
+
+ __assign_value(__assign_to->__value_, __src->__value_);
+
+ if (__assign_to->__left_) {
+ __assign_to->__left_ = static_cast<__node_base_pointer>(__copy_assign_tree(
+ static_cast<__node_pointer>(__assign_to->__left_), static_cast<__node_pointer>(__src->__left_)));
+ } else if (__src->__left_) {
+ auto __new_left = __copy_construct_tree(static_cast<__node_pointer>(__src->__left_));
+ __assign_to->__left_ = static_cast<__node_base_pointer>(__new_left);
+ __new_left->__parent_ = static_cast<__end_node_pointer>(__assign_to);
+ }
+
+ if (__assign_to->__right_) {
+ __assign_to->__right_ = static_cast<__node_base_pointer>(__copy_assign_tree(
+ static_cast<__node_pointer>(__assign_to->__right_), static_cast<__node_pointer>(__src->__right_)));
+ } else if (__src->__right_) {
+ auto __new_right = __copy_construct_tree(static_cast<__node_pointer>(__src->__right_));
+ __assign_to->__right_ = static_cast<__node_base_pointer>(__new_right);
+ __new_right->__parent_ = static_cast<__end_node_pointer>(__assign_to);
+ }
+
+ return __assign_to;
+ }
};
template <class _Tp, class _Compare, class _Allocator>
@@ -1277,11 +1351,28 @@ __tree<_Tp, _Compare, _Allocator>::_DetachedTreeCache::__detach_next(__node_poin
template <class _Tp, class _Compare, class _Allocator>
__tree<_Tp, _Compare, _Allocator>& __tree<_Tp, _Compare, _Allocator>::operator=(const __tree& __t) {
- if (this != std::addressof(__t)) {
- value_comp() = __t.value_comp();
- __copy_assign_alloc(__t);
- __assign_multi(__t.begin(), __t.end());
+ if (this == std::addressof(__t))
+ return *this;
+
+ value_comp() = __t.value_comp();
+ __copy_assign_alloc(__t);
+
+ if (__t.size() == 0) {
+ clear();
+ return *this;
+ }
+
+ if (__size_ != 0) {
+ __end_node_.__left_ = static_cast<__node_base_pointer>(__copy_assign_tree(
+ static_cast<__node_pointer>(__end_node_.__left_), static_cast<__node_pointer>(__t.__end_node_.__left_)));
+ } else {
+ __end_node_.__left_ =
+ static_cast<__node_base_pointer>(__copy_construct_tree(static_cast<__node_pointer>(__t.__end_node_.__left_)));
+ __end_node_.__left_->__parent_ = __end_node();
}
+ __begin_node_ = static_cast<__end_node_pointer>(std::__tree_min(static_cast<__node_base_pointer>(__end_node())));
+ __size_ = __t.__size_;
+
return *this;
}
@@ -1327,11 +1418,18 @@ void __tree<_Tp, _Compare, _Allocator>::__assign_multi(_InputIterator __first, _
template <class _Tp, class _Compare, class _Allocator>
__tree<_Tp, _Compare, _Allocator>::__tree(const __tree& __t)
- : __begin_node_(),
+ : __begin_node_(__end_node()),
__node_alloc_(__node_traits::select_on_container_copy_construction(__t.__node_alloc())),
__size_(0),
__value_comp_(__t.value_comp()) {
- __begin_node_ = __end_node();
+ if (__t.__size_ == 0)
+ return;
+
+ __end_node_.__left_ =
+ static_cast<__node_base_pointer>(__copy_construct_tree(static_cast<__node_pointer>(__t.__end_node_.__left_)));
+ __end_node_.__left_->__parent_ = __end_node();
+ __begin_node_ = static_cast<__end_node_pointer>(std::__tree_min(static_cast<__node_base_pointer>(__end_node())));
+ __size_ = __t.__size_;
}
template <class _Tp, class _Compare, class _Allocator>
@@ -1430,13 +1528,7 @@ __tree<_Tp, _Compare, _Allocator>::~__tree() {
template <class _Tp, class _Compare, class _Allocator>
void __tree<_Tp, _Compare, _Allocator>::destroy(__node_pointer __nd) _NOEXCEPT {
- if (__nd != nullptr) {
- destroy(static_cast<__node_pointer>(__nd->__left_));
- destroy(static_cast<__node_pointer>(__nd->__right_));
- __node_allocator& __na = __node_alloc();
- __node_traits::destroy(__na, std::addressof(__nd->__value_));
- __node_traits::deallocate(__na, __nd, 1);
- }
+ (__tree_deleter(__node_alloc_))(__nd);
}
template <class _Tp, class _Compare, class _Allocator>
diff --git a/libcxx/include/map b/libcxx/include/map
index 2251565801470..0a43bd09a0b16 100644
--- a/libcxx/include/map
+++ b/libcxx/include/map
@@ -970,7 +970,7 @@ public:
: map(from_range, std::forward<_Range>(__range), key_compare(), __a) {}
# endif
- _LIBCPP_HIDE_FROM_ABI map(const map& __m) : __tree_(__m.__tree_) { insert(__m.begin(), __m.end()); }
+ _LIBCPP_HIDE_FROM_ABI map(const map& __m) = default;
_LIBCPP_HIDE_FROM_ABI map& operator=(const map& __m) = default;
@@ -1637,11 +1637,7 @@ public:
: multimap(from_range, std::forward<_Range>(__range), key_compare(), __a) {}
# endif
- _LIBCPP_HIDE_FROM_ABI multimap(const multimap& __m)
- : __tree_(__m.__tree_.value_comp(),
- __alloc_traits::select_on_container_copy_construction(__m.__tree_.__alloc())) {
- insert(__m.begin(), __m.end());
- }
+ _LIBCPP_HIDE_FROM_ABI multimap(const multimap& __m) = default;
_LIBCPP_HIDE_FROM_ABI multimap& operator=(const multimap& __m) = default;
diff --git a/libcxx/include/set b/libcxx/include/set
index 1f2fd7fc91bbb..342a5294c814f 100644
--- a/libcxx/include/set
+++ b/libcxx/include/set
@@ -660,7 +660,7 @@ public:
: set(from_range, std::forward<_Range>(__range), key_compare(), __a) {}
# endif
- _LIBCPP_HIDE_FROM_ABI set(const set& __s) : __tree_(__s.__tree_) { insert(__s.begin(), __s.end()); }
+ _LIBCPP_HIDE_FROM_ABI set(const set& __s) = default;
_LIBCPP_HIDE_FROM_ABI set& operator=(const set& __s) = default;
@@ -1119,11 +1119,7 @@ public:
: multiset(from_range, std::forward<_Range>(__range), key_compare(), __a) {}
# endif
- _LIBCPP_HIDE_FROM_ABI multiset(const multiset& __s)
- : __tree_(__s.__tree_.value_comp(),
- __alloc_traits::select_on_container_copy_construction(__s.__tree_.__alloc())) {
- insert(__s.begin(), __s.end());
- }
+ _LIBCPP_HIDE_FROM_ABI multiset(const multiset& __s) = default;
_LIBCPP_HIDE_FROM_ABI multiset& operator=(const multiset& __s) = default;
diff --git a/libcxx/test/benchmarks/containers/associative/associative_container_benchmarks.h b/libcxx/test/benchmarks/containers/associative/associative_container_benchmarks.h
index 0ff7f15164d8a..6cf2dcfc59e13 100644
--- a/libcxx/test/benchmarks/containers/associative/associative_container_benchmarks.h
+++ b/libcxx/test/benchmarks/containers/associative/associative_container_benchmarks.h
@@ -151,7 +151,7 @@ void associative_container_benchmarks(std::string container) {
/////////////////////////
// Assignment
/////////////////////////
- bench("operator=(const&)", [=](auto& st) {
+ bench("operator=(const&) (into cleared Container)", [=](auto& st) {
const std::size_t size = st.range(0);
std::vector<Value> in = make_value_types(generate_unique_keys(size));
Container src(in.begin(), in.end());
@@ -172,6 +172,21 @@ void associative_container_benchmarks(std::string container) {
}
});
+ bench("operator=(const&) (into populated Container)", [=](auto& st) {
+ const std::size_t size = st.range(0);
+ std::vector<Value> in = make_value_types(generate_unique_keys(size));
+ Container src(in.begin(), in.end());
+ Container c[BatchSize];
+
+ while (st.KeepRunningBatch(BatchSize)) {
+ for (std::size_t i = 0; i != BatchSize; ++i) {
+ c[i] = src;
+ benchmark::DoNotOptimize(c[i]);
+ benchmark::ClobberMemory();
+ }
+ }
+ });
+
/////////////////////////
// Insertion
/////////////////////////
diff --git a/libcxx/test/benchmarks/containers/associative/map.bench.cpp b/libcxx/test/benchmarks/containers/associative/map.bench.cpp
index cee669ae0a667..bd664dbb56ee7 100644
--- a/libcxx/test/benchmarks/containers/associative/map.bench.cpp
+++ b/libcxx/test/benchmarks/containers/associative/map.bench.cpp
@@ -29,6 +29,7 @@ struct support::adapt_operations<std::map<K, V>> {
int main(int argc, char** argv) {
support::associative_container_benchmarks<std::map<int, int>>("std::map<int, int>");
+ support::associative_container_benchmarks<std::map<std::string, int>>("std::map<std::string, int>");
benchmark::Initialize(&argc, argv);
benchmark::RunSpecifiedBenchmarks();
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is amazing!
@@ -1213,6 +1213,80 @@ private: | |||
__node_pointer __cache_root_; | |||
__node_pointer __cache_elem_; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you please add a comment explaining what you learned about our __tree
implementation? This can be a followup NFC patch, but I think it would be useful because none of it is documented at the moment.
@@ -1277,11 +1351,28 @@ __tree<_Tp, _Compare, _Allocator>::_DetachedTreeCache::__detach_next(__node_poin | |||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As a follow-up, I might clean up these __assign_multi
and __assign_unique
methods which seem a bit like dead weight now.
libcxx/test/benchmarks/containers/associative/associative_container_benchmarks.h
Show resolved
Hide resolved
651b570
to
97e4cc5
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
libcxx/test/std/containers/associative/map/map.cons/copy.pass.cpp
Outdated
Show resolved
Hide resolved
libcxx/test/std/containers/associative/map/map.cons/copy.pass.cpp
Outdated
Show resolved
Hide resolved
libcxx/test/std/containers/associative/map/map.cons/copy_alloc.pass.cpp
Outdated
Show resolved
Hide resolved
libcxx/test/std/containers/associative/map/map.cons/copy_assign.pass.cpp
Outdated
Show resolved
Hide resolved
13bdba4
to
88d4197
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks great, thanks! LGTM with a few last minor comments.
libcxx/test/benchmarks/containers/associative/associative_container_benchmarks.h
Show resolved
Hide resolved
88d4197
to
cc5178c
Compare
cc5178c
to
da4f174
Compare
Looks like this PR broke ubsan: |
…lvm#151304)" This reverts commit 1cac2be.
This has been introduced by #151304. This problem is diagnosed by UBSan with optimizations enabled. Since we run UBSan only with optimizations disabled currently, this isn't caught in our CI. We should look into enabling UBSan with optimizations enabled to catch these sorts of issues before landing a patch.
This has been introduced by llvm#151304. This problem is diagnosed by UBSan with optimizations enabled. Since we run UBSan only with optimizations disabled currently, this isn't caught in our CI. We should look into enabling UBSan with optimizations enabled to catch these sorts of issues before landing a patch.
This has been introduced by #151304. This problem is diagnosed by UBSan with optimizations enabled. Since we run UBSan only with optimizations disabled currently, this isn't caught in our CI. We should look into enabling UBSan with optimizations enabled to catch these sorts of issues before landing a patch.
…1304) ``` ---------------------------------------------------------------------------------------------------------- Benchmark old new ---------------------------------------------------------------------------------------------------------- std::map<int, int>::ctor(const&)/0 15.5 ns 14.9 ns std::map<int, int>::ctor(const&)/32 474 ns 321 ns std::map<int, int>::ctor(const&)/1024 24591 ns 11101 ns std::map<int, int>::ctor(const&)/8192 236153 ns 98868 ns std::map<std::string, int>::ctor(const&)/0 15.2 ns 14.9 ns std::map<std::string, int>::ctor(const&)/32 2673 ns 2340 ns std::map<std::string, int>::ctor(const&)/1024 115354 ns 86088 ns std::map<std::string, int>::ctor(const&)/8192 1298510 ns 626876 ns std::map<int, int>::operator=(const&) (into cleared Container)/0 16.5 ns 16.1 ns std::map<int, int>::operator=(const&) (into cleared Container)/32 548 ns 323 ns std::map<int, int>::operator=(const&) (into cleared Container)/1024 28418 ns 11026 ns std::map<int, int>::operator=(const&) (into cleared Container)/8192 281827 ns 97113 ns std::map<int, int>::operator=(const&) (into populated Container)/0 2.42 ns 1.85 ns std::map<int, int>::operator=(const&) (into populated Container)/32 369 ns 73.0 ns std::map<int, int>::operator=(const&) (into populated Container)/1024 24078 ns 2322 ns std::map<int, int>::operator=(const&) (into populated Container)/8192 266537 ns 22963 ns std::map<std::string, int>::operator=(const&) (into cleared Container)/0 16.6 ns 16.2 ns std::map<std::string, int>::operator=(const&) (into cleared Container)/32 2614 ns 1622 ns std::map<std::string, int>::operator=(const&) (into cleared Container)/1024 116826 ns 63281 ns std::map<std::string, int>::operator=(const&) (into cleared Container)/8192 1316655 ns 649177 ns std::map<std::string, int>::operator=(const&) (into populated Container)/0 2.42 ns 1.89 ns std::map<std::string, int>::operator=(const&) (into populated Container)/32 1264 ns 581 ns std::map<std::string, int>::operator=(const&) (into populated Container)/1024 238826 ns 39943 ns std::map<std::string, int>::operator=(const&) (into populated Container)/8192 2412327 ns 379456 ns ``` Fixes llvm#77658 Fixes llvm#62571
This has been introduced by llvm#151304. This problem is diagnosed by UBSan with optimizations enabled. Since we run UBSan only with optimizations disabled currently, this isn't caught in our CI. We should look into enabling UBSan with optimizations enabled to catch these sorts of issues before landing a patch.
Fixes #77658
Fixes #62571