Skip to content

Commit 6438065

Browse files
philnik777krishna2803
authored andcommitted
[libc++] Optimize copy construction and assignment of __tree (llvm#151304)
``` ---------------------------------------------------------------------------------------------------------- Benchmark old new ---------------------------------------------------------------------------------------------------------- std::map<int, int>::ctor(const&)/0 15.5 ns 14.9 ns std::map<int, int>::ctor(const&)/32 474 ns 321 ns std::map<int, int>::ctor(const&)/1024 24591 ns 11101 ns std::map<int, int>::ctor(const&)/8192 236153 ns 98868 ns std::map<std::string, int>::ctor(const&)/0 15.2 ns 14.9 ns std::map<std::string, int>::ctor(const&)/32 2673 ns 2340 ns std::map<std::string, int>::ctor(const&)/1024 115354 ns 86088 ns std::map<std::string, int>::ctor(const&)/8192 1298510 ns 626876 ns std::map<int, int>::operator=(const&) (into cleared Container)/0 16.5 ns 16.1 ns std::map<int, int>::operator=(const&) (into cleared Container)/32 548 ns 323 ns std::map<int, int>::operator=(const&) (into cleared Container)/1024 28418 ns 11026 ns std::map<int, int>::operator=(const&) (into cleared Container)/8192 281827 ns 97113 ns std::map<int, int>::operator=(const&) (into populated Container)/0 2.42 ns 1.85 ns std::map<int, int>::operator=(const&) (into populated Container)/32 369 ns 73.0 ns std::map<int, int>::operator=(const&) (into populated Container)/1024 24078 ns 2322 ns std::map<int, int>::operator=(const&) (into populated Container)/8192 266537 ns 22963 ns std::map<std::string, int>::operator=(const&) (into cleared Container)/0 16.6 ns 16.2 ns std::map<std::string, int>::operator=(const&) (into cleared Container)/32 2614 ns 1622 ns std::map<std::string, int>::operator=(const&) (into cleared Container)/1024 116826 ns 63281 ns std::map<std::string, int>::operator=(const&) (into cleared Container)/8192 1316655 ns 649177 ns std::map<std::string, int>::operator=(const&) (into populated Container)/0 2.42 ns 1.89 ns std::map<std::string, int>::operator=(const&) (into populated Container)/32 1264 ns 581 ns std::map<std::string, int>::operator=(const&) (into populated Container)/1024 238826 ns 39943 ns std::map<std::string, int>::operator=(const&) (into populated Container)/8192 2412327 ns 379456 ns ``` Fixes llvm#77658 Fixes llvm#62571
1 parent e91eb43 commit 6438065

File tree

22 files changed

+1984
-1029
lines changed

22 files changed

+1984
-1029
lines changed

libcxx/docs/ReleaseNotes/22.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ Implemented Papers
4343
Improvements and New Features
4444
-----------------------------
4545

46+
- The performance of ``map::map(const map&)`` has been improved up to 2.3x
47+
- The performance of ``map::operator=(const map&)`` has been improved by up to 11x
48+
4649
Deprecations and Removals
4750
-------------------------
4851

libcxx/include/__tree

Lines changed: 122 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1213,6 +1213,104 @@ private:
12131213
__node_pointer __cache_root_;
12141214
__node_pointer __cache_elem_;
12151215
};
1216+
1217+
class __tree_deleter {
1218+
__node_allocator& __alloc_;
1219+
1220+
public:
1221+
using pointer = __node_pointer;
1222+
1223+
_LIBCPP_HIDE_FROM_ABI __tree_deleter(__node_allocator& __alloc) : __alloc_(__alloc) {}
1224+
1225+
#ifdef _LIBCPP_COMPILER_CLANG_BASED // FIXME: GCC complains about not being able to always_inline a recursive function
1226+
_LIBCPP_HIDE_FROM_ABI
1227+
#endif
1228+
void
1229+
operator()(__node_pointer __ptr) {
1230+
if (!__ptr)
1231+
return;
1232+
1233+
(*this)(static_cast<__node_pointer>(__ptr->__left_));
1234+
1235+
auto __right = __ptr->__right_;
1236+
1237+
__node_traits::destroy(__alloc_, std::addressof(__ptr->__value_));
1238+
__node_traits::deallocate(__alloc_, __ptr, 1);
1239+
1240+
(*this)(static_cast<__node_pointer>(__right));
1241+
}
1242+
};
1243+
1244+
// This copy construction will always produce a correct red-black-tree assuming the incoming tree is correct, since we
1245+
// copy the exact structure 1:1. Since this is for copy construction _only_ we know that we get a correct tree. If we
1246+
// didn't get a correct tree, the invariants of __tree are broken and we have a much bigger problem than an improperly
1247+
// balanced tree.
1248+
#ifdef _LIBCPP_COMPILER_CLANG_BASED // FIXME: GCC complains about not being able to always_inline a recursive function
1249+
_LIBCPP_HIDE_FROM_ABI
1250+
#endif
1251+
__node_pointer
1252+
__copy_construct_tree(__node_pointer __src) {
1253+
if (!__src)
1254+
return nullptr;
1255+
1256+
__node_holder __new_node = __construct_node(__src->__value_);
1257+
1258+
unique_ptr<__node, __tree_deleter> __left(
1259+
__copy_construct_tree(static_cast<__node_pointer>(__src->__left_)), __node_alloc_);
1260+
__node_pointer __right = __copy_construct_tree(static_cast<__node_pointer>(__src->__right_));
1261+
1262+
__node_pointer __new_node_ptr = __new_node.release();
1263+
1264+
__new_node_ptr->__is_black_ = __src->__is_black_;
1265+
__new_node_ptr->__left_ = static_cast<__node_base_pointer>(__left.release());
1266+
__new_node_ptr->__right_ = static_cast<__node_base_pointer>(__right);
1267+
if (__new_node_ptr->__left_)
1268+
__new_node_ptr->__left_->__parent_ = static_cast<__end_node_pointer>(__new_node_ptr);
1269+
if (__new_node_ptr->__right_)
1270+
__new_node_ptr->__right_->__parent_ = static_cast<__end_node_pointer>(__new_node_ptr);
1271+
return __new_node_ptr;
1272+
}
1273+
1274+
// This copy assignment will always produce a correct red-black-tree assuming the incoming tree is correct, since our
1275+
// own tree is a red-black-tree and the incoming tree is a red-black-tree. The invariants of a red-black-tree are
1276+
// temporarily not met until all of the incoming red-black tree is copied.
1277+
#ifdef _LIBCPP_COMPILER_CLANG_BASED // FIXME: GCC complains about not being able to always_inline a recursive function
1278+
_LIBCPP_HIDE_FROM_ABI
1279+
#endif
1280+
__node_pointer
1281+
__copy_assign_tree(__node_pointer __dest, __node_pointer __src) {
1282+
if (!__src) {
1283+
destroy(__dest);
1284+
return nullptr;
1285+
}
1286+
1287+
__assign_value(__dest->__value_, __src->__value_);
1288+
__dest->__is_black_ = __src->__is_black_;
1289+
1290+
// If we already have a left node in the destination tree, reuse it and copy-assign recursively
1291+
if (__dest->__left_) {
1292+
__dest->__left_ = static_cast<__node_base_pointer>(__copy_assign_tree(
1293+
static_cast<__node_pointer>(__dest->__left_), static_cast<__node_pointer>(__src->__left_)));
1294+
1295+
// Otherwise, we must create new nodes; copy-construct from here on
1296+
} else if (__src->__left_) {
1297+
auto __new_left = __copy_construct_tree(static_cast<__node_pointer>(__src->__left_));
1298+
__dest->__left_ = static_cast<__node_base_pointer>(__new_left);
1299+
__new_left->__parent_ = static_cast<__end_node_pointer>(__dest);
1300+
}
1301+
1302+
// Identical to the left case above, just for the right nodes
1303+
if (__dest->__right_) {
1304+
__dest->__right_ = static_cast<__node_base_pointer>(__copy_assign_tree(
1305+
static_cast<__node_pointer>(__dest->__right_), static_cast<__node_pointer>(__src->__right_)));
1306+
} else if (__src->__right_) {
1307+
auto __new_right = __copy_construct_tree(static_cast<__node_pointer>(__src->__right_));
1308+
__dest->__right_ = static_cast<__node_base_pointer>(__new_right);
1309+
__new_right->__parent_ = static_cast<__end_node_pointer>(__dest);
1310+
}
1311+
1312+
return __dest;
1313+
}
12161314
};
12171315

12181316
template <class _Tp, class _Compare, class _Allocator>
@@ -1277,11 +1375,22 @@ __tree<_Tp, _Compare, _Allocator>::_DetachedTreeCache::__detach_next(__node_poin
12771375

12781376
template <class _Tp, class _Compare, class _Allocator>
12791377
__tree<_Tp, _Compare, _Allocator>& __tree<_Tp, _Compare, _Allocator>::operator=(const __tree& __t) {
1280-
if (this != std::addressof(__t)) {
1281-
value_comp() = __t.value_comp();
1282-
__copy_assign_alloc(__t);
1283-
__assign_multi(__t.begin(), __t.end());
1378+
if (this == std::addressof(__t))
1379+
return *this;
1380+
1381+
value_comp() = __t.value_comp();
1382+
__copy_assign_alloc(__t);
1383+
1384+
if (__size_ != 0) {
1385+
*__root_ptr() = static_cast<__node_base_pointer>(__copy_assign_tree(__root(), __t.__root()));
1386+
} else {
1387+
*__root_ptr() = static_cast<__node_base_pointer>(__copy_construct_tree(__t.__root()));
1388+
if (__root())
1389+
__root()->__parent_ = __end_node();
12841390
}
1391+
__begin_node_ = static_cast<__end_node_pointer>(std::__tree_min(static_cast<__node_base_pointer>(__end_node())));
1392+
__size_ = __t.size();
1393+
12851394
return *this;
12861395
}
12871396

@@ -1327,11 +1436,17 @@ void __tree<_Tp, _Compare, _Allocator>::__assign_multi(_InputIterator __first, _
13271436

13281437
template <class _Tp, class _Compare, class _Allocator>
13291438
__tree<_Tp, _Compare, _Allocator>::__tree(const __tree& __t)
1330-
: __begin_node_(),
1439+
: __begin_node_(__end_node()),
13311440
__node_alloc_(__node_traits::select_on_container_copy_construction(__t.__node_alloc())),
13321441
__size_(0),
13331442
__value_comp_(__t.value_comp()) {
1334-
__begin_node_ = __end_node();
1443+
if (__t.size() == 0)
1444+
return;
1445+
1446+
*__root_ptr() = static_cast<__node_base_pointer>(__copy_construct_tree(__t.__root()));
1447+
__root()->__parent_ = __end_node();
1448+
__begin_node_ = static_cast<__end_node_pointer>(std::__tree_min(static_cast<__node_base_pointer>(__end_node())));
1449+
__size_ = __t.size();
13351450
}
13361451

13371452
template <class _Tp, class _Compare, class _Allocator>
@@ -1430,13 +1545,7 @@ __tree<_Tp, _Compare, _Allocator>::~__tree() {
14301545

14311546
template <class _Tp, class _Compare, class _Allocator>
14321547
void __tree<_Tp, _Compare, _Allocator>::destroy(__node_pointer __nd) _NOEXCEPT {
1433-
if (__nd != nullptr) {
1434-
destroy(static_cast<__node_pointer>(__nd->__left_));
1435-
destroy(static_cast<__node_pointer>(__nd->__right_));
1436-
__node_allocator& __na = __node_alloc();
1437-
__node_traits::destroy(__na, std::addressof(__nd->__value_));
1438-
__node_traits::deallocate(__na, __nd, 1);
1439-
}
1548+
(__tree_deleter(__node_alloc_))(__nd);
14401549
}
14411550

14421551
template <class _Tp, class _Compare, class _Allocator>

libcxx/include/map

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -970,7 +970,7 @@ public:
970970
: map(from_range, std::forward<_Range>(__range), key_compare(), __a) {}
971971
# endif
972972

973-
_LIBCPP_HIDE_FROM_ABI map(const map& __m) : __tree_(__m.__tree_) { insert(__m.begin(), __m.end()); }
973+
_LIBCPP_HIDE_FROM_ABI map(const map& __m) = default;
974974

975975
_LIBCPP_HIDE_FROM_ABI map& operator=(const map& __m) = default;
976976

@@ -1637,11 +1637,7 @@ public:
16371637
: multimap(from_range, std::forward<_Range>(__range), key_compare(), __a) {}
16381638
# endif
16391639

1640-
_LIBCPP_HIDE_FROM_ABI multimap(const multimap& __m)
1641-
: __tree_(__m.__tree_.value_comp(),
1642-
__alloc_traits::select_on_container_copy_construction(__m.__tree_.__alloc())) {
1643-
insert(__m.begin(), __m.end());
1644-
}
1640+
_LIBCPP_HIDE_FROM_ABI multimap(const multimap& __m) = default;
16451641

16461642
_LIBCPP_HIDE_FROM_ABI multimap& operator=(const multimap& __m) = default;
16471643

libcxx/include/set

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -660,7 +660,7 @@ public:
660660
: set(from_range, std::forward<_Range>(__range), key_compare(), __a) {}
661661
# endif
662662

663-
_LIBCPP_HIDE_FROM_ABI set(const set& __s) : __tree_(__s.__tree_) { insert(__s.begin(), __s.end()); }
663+
_LIBCPP_HIDE_FROM_ABI set(const set& __s) = default;
664664

665665
_LIBCPP_HIDE_FROM_ABI set& operator=(const set& __s) = default;
666666

@@ -1119,11 +1119,7 @@ public:
11191119
: multiset(from_range, std::forward<_Range>(__range), key_compare(), __a) {}
11201120
# endif
11211121

1122-
_LIBCPP_HIDE_FROM_ABI multiset(const multiset& __s)
1123-
: __tree_(__s.__tree_.value_comp(),
1124-
__alloc_traits::select_on_container_copy_construction(__s.__tree_.__alloc())) {
1125-
insert(__s.begin(), __s.end());
1126-
}
1122+
_LIBCPP_HIDE_FROM_ABI multiset(const multiset& __s) = default;
11271123

11281124
_LIBCPP_HIDE_FROM_ABI multiset& operator=(const multiset& __s) = default;
11291125

libcxx/test/benchmarks/containers/associative/associative_container_benchmarks.h

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ void associative_container_benchmarks(std::string container) {
151151
/////////////////////////
152152
// Assignment
153153
/////////////////////////
154-
bench("operator=(const&)", [=](auto& st) {
154+
bench("operator=(const&) (into cleared Container)", [=](auto& st) {
155155
const std::size_t size = st.range(0);
156156
std::vector<Value> in = make_value_types(generate_unique_keys(size));
157157
Container src(in.begin(), in.end());
@@ -172,6 +172,42 @@ void associative_container_benchmarks(std::string container) {
172172
}
173173
});
174174

175+
bench("operator=(const&) (into partially populated Container)", [=](auto& st) {
176+
const std::size_t size = st.range(0);
177+
std::vector<Value> in = make_value_types(generate_unique_keys(size));
178+
Container src(in.begin(), in.end());
179+
Container c[BatchSize];
180+
181+
while (st.KeepRunningBatch(BatchSize)) {
182+
for (std::size_t i = 0; i != BatchSize; ++i) {
183+
c[i] = src;
184+
benchmark::DoNotOptimize(c[i]);
185+
benchmark::ClobberMemory();
186+
}
187+
188+
st.PauseTiming();
189+
for (std::size_t i = 0; i != BatchSize; ++i) {
190+
c[i].clear();
191+
}
192+
st.ResumeTiming();
193+
}
194+
});
195+
196+
bench("operator=(const&) (into populated Container)", [=](auto& st) {
197+
const std::size_t size = st.range(0);
198+
std::vector<Value> in = make_value_types(generate_unique_keys(size));
199+
Container src(in.begin(), in.end());
200+
Container c[BatchSize];
201+
202+
while (st.KeepRunningBatch(BatchSize)) {
203+
for (std::size_t i = 0; i != BatchSize; ++i) {
204+
c[i] = src;
205+
benchmark::DoNotOptimize(c[i]);
206+
benchmark::ClobberMemory();
207+
}
208+
}
209+
});
210+
175211
/////////////////////////
176212
// Insertion
177213
/////////////////////////

libcxx/test/benchmarks/containers/associative/map.bench.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ struct support::adapt_operations<std::map<K, V>> {
2929

3030
int main(int argc, char** argv) {
3131
support::associative_container_benchmarks<std::map<int, int>>("std::map<int, int>");
32+
support::associative_container_benchmarks<std::map<std::string, int>>("std::map<std::string, int>");
3233

3334
benchmark::Initialize(&argc, argv);
3435
benchmark::RunSpecifiedBenchmarks();

libcxx/test/benchmarks/containers/associative/multimap.bench.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ struct support::adapt_operations<std::multimap<K, V>> {
2828

2929
int main(int argc, char** argv) {
3030
support::associative_container_benchmarks<std::multimap<int, int>>("std::multimap<int, int>");
31+
support::associative_container_benchmarks<std::multimap<std::string, int>>("std::multimap<std::string, int>");
3132

3233
benchmark::Initialize(&argc, argv);
3334
benchmark::RunSpecifiedBenchmarks();

libcxx/test/benchmarks/containers/associative/multiset.bench.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ struct support::adapt_operations<std::multiset<K>> {
2626

2727
int main(int argc, char** argv) {
2828
support::associative_container_benchmarks<std::multiset<int>>("std::multiset<int>");
29+
support::associative_container_benchmarks<std::multiset<std::string>>("std::multiset<std::string>");
2930

3031
benchmark::Initialize(&argc, argv);
3132
benchmark::RunSpecifiedBenchmarks();

libcxx/test/benchmarks/containers/associative/set.bench.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ struct support::adapt_operations<std::set<K>> {
2727

2828
int main(int argc, char** argv) {
2929
support::associative_container_benchmarks<std::set<int>>("std::set<int>");
30+
support::associative_container_benchmarks<std::set<std::string>>("std::set<std::string>");
3031

3132
benchmark::Initialize(&argc, argv);
3233
benchmark::RunSpecifiedBenchmarks();

libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,11 @@ void deque_test() {
324324

325325
void map_test() {
326326
std::map<int, int> i_am_empty{};
327+
328+
// Make __tree_itertor available in the debug info
329+
// FIXME: Is there any way to avoid this requirement?
330+
(void)i_am_empty.begin();
331+
327332
ComparePrettyPrintToChars(i_am_empty, "std::map is empty");
328333

329334
std::map<int, std::string> one_two_three;

0 commit comments

Comments
 (0)