Skip to content

Commit 4c5877d

Browse files
authored
[libc++] Optimize map::insert_or_assign (llvm#155816)
`__emplace_unique` uses `__find_equal`, which can be significantly faster than `lower_bound`. As a nice side-effect, this also changes the implementation to the "naive" implementation of trying `insert` first, and if that fails assign instead. This also matches the `insert_or_assign` overloads with a hint. ``` Zen 2: -------------------------------------------------------------------------------------------------------- Benchmark old new -------------------------------------------------------------------------------------------------------- std::map<int, int>::insert_or_assign(key, value) (already present)/0 1.62 ns 1.53 ns std::map<int, int>::insert_or_assign(key, value) (already present)/32 5.78 ns 5.99 ns std::map<int, int>::insert_or_assign(key, value) (already present)/1024 21.5 ns 15.4 ns std::map<int, int>::insert_or_assign(key, value) (already present)/8192 26.2 ns 20.5 ns std::map<int, int>::insert_or_assign(key, value) (new value)/0 22.5 ns 21.1 ns std::map<int, int>::insert_or_assign(key, value) (new value)/32 42.9 ns 28.4 ns std::map<int, int>::insert_or_assign(key, value) (new value)/1024 118 ns 92.0 ns std::map<int, int>::insert_or_assign(key, value) (new value)/8192 227 ns 173 ns std::map<std::string, int>::insert_or_assign(key, value) (already present)/0 13.2 ns 18.9 ns std::map<std::string, int>::insert_or_assign(key, value) (already present)/32 65.6 ns 39.0 ns std::map<std::string, int>::insert_or_assign(key, value) (already present)/1024 127 ns 64.4 ns std::map<std::string, int>::insert_or_assign(key, value) (already present)/8192 134 ns 71.4 ns std::map<std::string, int>::insert_or_assign(key, value) (new value)/0 45.6 ns 37.3 ns std::map<std::string, int>::insert_or_assign(key, value) (new value)/32 142 ns 93.3 ns std::map<std::string, int>::insert_or_assign(key, value) (new value)/1024 288 ns 147 ns std::map<std::string, int>::insert_or_assign(key, value) (new value)/8192 368 ns 182 ns Apple M4: -------------------------------------------------------------------------------------------------------- Benchmark old new -------------------------------------------------------------------------------------------------------- std::map<int, int>::insert_or_assign(key, value) (already present)/0 0.784 ns 0.740 ns std::map<int, int>::insert_or_assign(key, value) (already present)/32 2.52 ns 1.77 ns std::map<int, int>::insert_or_assign(key, value) (already present)/1024 8.72 ns 4.06 ns std::map<int, int>::insert_or_assign(key, value) (already present)/8192 10.6 ns 3.98 ns std::map<int, int>::insert_or_assign(key, value) (new value)/0 17.3 ns 17.2 ns std::map<int, int>::insert_or_assign(key, value) (new value)/32 22.5 ns 19.3 ns std::map<int, int>::insert_or_assign(key, value) (new value)/1024 56.8 ns 33.5 ns std::map<int, int>::insert_or_assign(key, value) (new value)/8192 88.2 ns 41.0 ns std::map<std::string, int>::insert_or_assign(key, value) (already present)/0 16.6 ns 11.8 ns std::map<std::string, int>::insert_or_assign(key, value) (already present)/32 13.7 ns 30.7 ns std::map<std::string, int>::insert_or_assign(key, value) (already present)/1024 46.7 ns 49.1 ns std::map<std::string, int>::insert_or_assign(key, value) (already present)/8192 41.9 ns 76.9 ns std::map<std::string, int>::insert_or_assign(key, value) (new value)/0 40.0 ns 40.5 ns std::map<std::string, int>::insert_or_assign(key, value) (new value)/32 38.9 ns 40.0 ns std::map<std::string, int>::insert_or_assign(key, value) (new value)/1024 84.9 ns 96.9 ns std::map<std::string, int>::insert_or_assign(key, value) (new value)/8192 166 ns 149 ns ```
1 parent c9d7d10 commit 4c5877d

File tree

3 files changed

+54
-12
lines changed

3 files changed

+54
-12
lines changed

libcxx/docs/ReleaseNotes/22.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ Improvements and New Features
5757
has been improved by up to 3x
5858
- The performance of ``insert(iterator, iterator)`` of ``multimap`` and ``multiset`` has been improved by up to 2.5x
5959
- The performance of ``erase(iterator, iterator)`` in the unordered containers has been improved by up to 1.9x
60+
- The performance of ``map::insert_or_assign`` has been improved by up to 2x
6061

6162
- ``ofstream::write`` has been optimized to pass through large strings to system calls directly instead of copying them
6263
in chunks into a buffer.

libcxx/include/map

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1144,22 +1144,20 @@ public:
11441144

11451145
template <class _Vp>
11461146
_LIBCPP_HIDE_FROM_ABI pair<iterator, bool> insert_or_assign(const key_type& __k, _Vp&& __v) {
1147-
iterator __p = lower_bound(__k);
1148-
if (__p != end() && !key_comp()(__k, __p->first)) {
1149-
__p->second = std::forward<_Vp>(__v);
1150-
return std::make_pair(__p, false);
1151-
}
1152-
return std::make_pair(emplace_hint(__p, __k, std::forward<_Vp>(__v)), true);
1147+
auto __result = __tree_.__emplace_unique(__k, std::forward<_Vp>(__v));
1148+
auto& [__iter, __inserted] = __result;
1149+
if (!__inserted)
1150+
__iter->second = std::forward<_Vp>(__v);
1151+
return __result;
11531152
}
11541153

11551154
template <class _Vp>
11561155
_LIBCPP_HIDE_FROM_ABI pair<iterator, bool> insert_or_assign(key_type&& __k, _Vp&& __v) {
1157-
iterator __p = lower_bound(__k);
1158-
if (__p != end() && !key_comp()(__k, __p->first)) {
1159-
__p->second = std::forward<_Vp>(__v);
1160-
return std::make_pair(__p, false);
1161-
}
1162-
return std::make_pair(emplace_hint(__p, std::move(__k), std::forward<_Vp>(__v)), true);
1156+
auto __result = __tree_.__emplace_unique(std::move(__k), std::forward<_Vp>(__v));
1157+
auto& [__iter, __inserted] = __result;
1158+
if (!__inserted)
1159+
__iter->second = std::forward<_Vp>(__v);
1160+
return __result;
11631161
}
11641162

11651163
template <class _Vp>

libcxx/test/benchmarks/containers/associative/associative_container_benchmarks.h

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,49 @@ void associative_container_benchmarks(std::string container) {
259259
}
260260
});
261261

262+
if constexpr (is_map_like && !is_multi_key_container) {
263+
bench("insert_or_assign(key, value) (already present)", [=](auto& st) {
264+
const std::size_t size = st.range(0) ? st.range(0) : 1;
265+
std::vector<Value> in = make_value_types(generate_unique_keys(size));
266+
Value to_insert = in[in.size() / 2]; // pick any existing value
267+
std::vector<Container> c(BatchSize, Container(in.begin(), in.end()));
268+
typename Container::iterator inserted[BatchSize];
269+
270+
while (st.KeepRunningBatch(BatchSize)) {
271+
for (std::size_t i = 0; i != BatchSize; ++i) {
272+
inserted[i] =
273+
adapt_operations<Container>::get_iterator(c[i].insert_or_assign(to_insert.first, to_insert.second));
274+
benchmark::DoNotOptimize(inserted[i]);
275+
benchmark::DoNotOptimize(c[i]);
276+
benchmark::ClobberMemory();
277+
}
278+
}
279+
});
280+
281+
bench("insert_or_assign(key, value) (new value)", [=](auto& st) {
282+
const std::size_t size = st.range(0);
283+
std::vector<Value> in = make_value_types(generate_unique_keys(size + 1));
284+
Value to_insert = in.back();
285+
in.pop_back();
286+
std::vector<Container> c(BatchSize, Container(in.begin(), in.end()));
287+
288+
while (st.KeepRunningBatch(BatchSize)) {
289+
for (std::size_t i = 0; i != BatchSize; ++i) {
290+
auto result = c[i].insert_or_assign(to_insert.first, to_insert.second);
291+
benchmark::DoNotOptimize(result);
292+
benchmark::DoNotOptimize(c[i]);
293+
benchmark::ClobberMemory();
294+
}
295+
296+
st.PauseTiming();
297+
for (std::size_t i = 0; i != BatchSize; ++i) {
298+
c[i].erase(get_key(to_insert));
299+
}
300+
st.ResumeTiming();
301+
}
302+
});
303+
}
304+
262305
// The insert(hint, ...) methods are only relevant for ordered containers, and we lack
263306
// a good way to compute a hint for unordered ones.
264307
if constexpr (is_ordered_container) {

0 commit comments

Comments
 (0)