Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions libcxx/docs/ReleaseNotes/20.rst
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,13 @@ Improvements and New Features
- The ``_LIBCPP_ABI_BOUNDED_ITERATORS_IN_STD_ARRAY`` ABI configuration was added, which allows storing valid bounds
in ``std::array::iterator`` and detecting OOB accesses when the appropriate hardening mode is enabled.

- The input iterator overload of `assign(_InputIterator, _InputIterator)` in `std::vector<_Tp, _Allocator>` has been
optimized, resulting in a performance improvement of up to 2x for trivial element types (e.g., `std::vector<int>`),
and up to 3.4x for non-trivial element types (e.g., `std::vector<std::vector<int>>`).
- The ``input_iterator``-pair overload of ``void assign(InputIt, InputIt)`` has been optimized for ``std::vector``,
resulting in a performance improvement of up to 2x for trivial element types (e.g., ``std::vector<int>``), and up
to 3.4x for non-trivial element types (e.g., ``std::vector<std::vector<int>>``).

- The ``input_iterator``-pair overload of ``iterator insert(const_iterator, InputIt, InputIt)`` has been optimized
for ``std::vector``, resulting in a performance improvement of up to 10x for ``std::vector<int>``, and up to 2.3x
for ``std::vector<std::vector<int>>``.

- On Windows, ``<system_error>``'s ``std::system_category`` is now distinct from ``std::generic_category``. The behavior
on other operating systems is unchanged.
Expand Down
43 changes: 21 additions & 22 deletions libcxx/include/__vector/vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -1250,30 +1250,29 @@ vector<_Tp, _Allocator>::__insert_with_sentinel(const_iterator __position, _Inpu
difference_type __off = __position - begin();
pointer __p = this->__begin_ + __off;
pointer __old_last = this->__end_;
for (; this->__end_ != this->__cap_ && __first != __last; ++__first) {
for (; this->__end_ != this->__cap_ && __first != __last; ++__first)
__construct_one_at_end(*__first);

if (__first == __last)
(void)std::rotate(__p, __old_last, this->__end_);
else {
__split_buffer<value_type, allocator_type&> __v(__alloc_);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we could have a heuristic of some kind here, we could pre-allocate __v to a given size and hope that we get lucky. That way, we might not have to reallocate memory for __v. Without a heuristic though, I think allocating the smallest amount of memory (what you have right now) is probably the best approach. It's worth thinking about whether such a heuristic exists.

Suggested change
__split_buffer<value_type, allocator_type&> __v(__alloc_);
__split_buffer<value_type, allocator_type&> __v(__alloc_);
__v.reserve(__recommend(capacity() + heuristic-maybe));
// now if we got lucky, perhaps __v.capacity() is enough to hold (size() + (last-first)) elements

auto __guard = std::__make_exception_guard(
_AllocatorDestroyRangeReverse<allocator_type, pointer>(__alloc_, __old_last, this->__end_));
__v.__construct_at_end_with_sentinel(std::move(__first), std::move(__last));
__split_buffer<value_type, allocator_type&> __merged(__recommend(size() + __v.size()), __off, __alloc_);
std::__uninitialized_allocator_relocate(
__alloc_, std::__to_address(__old_last), std::__to_address(this->__end_), std::__to_address(__merged.__end_));
__guard.__complete(); // Release the guard once objects in [__old_last_, __end_) have been successfully relocated.
__merged.__end_ += this->__end_ - __old_last;
this->__end_ = __old_last;
std::__uninitialized_allocator_relocate(
__alloc_, std::__to_address(__v.__begin_), std::__to_address(__v.__end_), std::__to_address(__merged.__end_));
__merged.__end_ += __v.size();
__v.__end_ = __v.__begin_;
__p = __swap_out_circular_buffer(__merged, __p);
}
__split_buffer<value_type, allocator_type&> __v(this->__alloc_);
if (__first != __last) {
#if _LIBCPP_HAS_EXCEPTIONS
try {
#endif // _LIBCPP_HAS_EXCEPTIONS
__v.__construct_at_end_with_sentinel(std::move(__first), std::move(__last));
difference_type __old_size = __old_last - this->__begin_;
difference_type __old_p = __p - this->__begin_;
reserve(__recommend(size() + __v.size()));
__p = this->__begin_ + __old_p;
__old_last = this->__begin_ + __old_size;
#if _LIBCPP_HAS_EXCEPTIONS
} catch (...) {
erase(__make_iter(__old_last), end());
throw;
}
#endif // _LIBCPP_HAS_EXCEPTIONS
}
__p = std::rotate(__p, __old_last, this->__end_);
insert(__make_iter(__p), std::make_move_iterator(__v.begin()), std::make_move_iterator(__v.end()));
return begin() + __off;
return __make_iter(__p);
}

template <class _Tp, class _Allocator>
Expand Down
9 changes: 8 additions & 1 deletion libcxx/test/benchmarks/GenerateInput.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,14 @@ std::vector<std::vector<IntT>> getRandomIntegerInputsWithLength(std::size_t N, s
return inputs;
}

inline std::vector<std::string> getPrefixedRandomStringInputs(std::size_t N) {
inline std::vector<std::string> getSSORandomStringInputs(size_t N) {
std::vector<std::string> inputs;
for (size_t i = 0; i < N; ++i)
inputs.push_back(getRandomString(10)); // SSO
return inputs;
}

inline std::vector<std::string> getPrefixedRandomStringInputs(size_t N) {
std::vector<std::string> inputs;
inputs.reserve(N);
constexpr int kSuffixLength = 32;
Expand Down
60 changes: 60 additions & 0 deletions libcxx/test/benchmarks/containers/ContainerBenchmarks.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,66 @@ void BM_InsertValueRehash(benchmark::State& st, Container c, GenInputs gen) {
}
}

template <class Container, class GenInputs>
void BM_Insert_InputIterIter_NoRealloc(benchmark::State& st, Container c, GenInputs gen) {
auto in = gen(st.range(0));
DoNotOptimizeData(in);
const auto size = c.size();
const auto beg = cpp17_input_iterator(in.begin());
const auto end = cpp17_input_iterator(in.end());
c.reserve(size + in.size()); // force no reallocation
for (auto _ : st) {
benchmark::DoNotOptimize(&(*c.insert(c.begin(), beg, end)));
st.PauseTiming();
c.erase(c.begin() + size, c.end()); // avoid the container to grow indefinitely
st.ResumeTiming();
DoNotOptimizeData(c);
benchmark::ClobberMemory();
}
}

template <class Container, class GenInputs>
void BM_Insert_InputIterIter_Realloc_HalfFilled(benchmark::State& st, Container, GenInputs gen) {
const auto size = st.range(0);
Container a = gen(size);
Container in = gen(size + 10);
DoNotOptimizeData(a);
DoNotOptimizeData(in);
const auto beg = cpp17_input_iterator(in.begin());
const auto end = cpp17_input_iterator(in.end());
for (auto _ : st) {
st.PauseTiming();
Container c;
c.reserve(size * 2); // Reallocation with half-filled container
c = a;
st.ResumeTiming();
benchmark::DoNotOptimize(&(*c.insert(c.begin(), beg, end)));
DoNotOptimizeData(c);
benchmark::ClobberMemory();
}
}

template <class Container, class GenInputs>
void BM_Insert_InputIterIter_Realloc_NearFull(benchmark::State& st, Container, GenInputs gen) {
const auto size = st.range(0);
Container a = gen(size);
Container in = gen(10);
DoNotOptimizeData(a);
DoNotOptimizeData(in);
const auto beg = cpp17_input_iterator(in.begin());
const auto end = cpp17_input_iterator(in.end());
for (auto _ : st) {
st.PauseTiming();
Container c;
c.reserve(size + 5); // Reallocation almost-full container
c = a;
st.ResumeTiming();
benchmark::DoNotOptimize(&(*c.insert(c.begin(), beg, end)));
DoNotOptimizeData(c);
benchmark::ClobberMemory();
}
}

template <class Container, class GenInputs>
void BM_InsertDuplicate(benchmark::State& st, Container c, GenInputs gen) {
auto in = gen(st.range(0));
Expand Down
14 changes: 14 additions & 0 deletions libcxx/test/benchmarks/containers/vector_operations.bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,18 @@ BENCHMARK_CAPTURE(BM_AssignInputIterIter<100>,
getRandomIntegerInputsWithLength<int>)
->Args({TestNumInputs, TestNumInputs});

BENCHMARK_CAPTURE(BM_Insert_InputIterIter_NoRealloc, vector_int, std::vector<int>(100, 1), getRandomIntegerInputs<int>)
->Arg(514048);
BENCHMARK_CAPTURE(
BM_Insert_InputIterIter_Realloc_HalfFilled, vector_int, std::vector<int>{}, getRandomIntegerInputs<int>)
->Arg(514048);
BENCHMARK_CAPTURE(BM_Insert_InputIterIter_Realloc_NearFull, vector_int, std::vector<int>{}, getRandomIntegerInputs<int>)
->Arg(514048);
BENCHMARK_CAPTURE(
BM_Insert_InputIterIter_Realloc_HalfFilled, vector_string, std::vector<std::string>{}, getSSORandomStringInputs)
->Arg(514048);
BENCHMARK_CAPTURE(
BM_Insert_InputIterIter_Realloc_NearFull, vector_string, std::vector<std::string>{}, getSSORandomStringInputs)
->Arg(514048);

BENCHMARK_MAIN();
Loading
Loading