Skip to content

Commit 32987d5

Browse files
committed
Merge #12549: Make prevector::resize() and other prevector operations much faster
5aad635 Use memset() to optimize prevector::resize() (Evan Klitzke) e46be25 Reduce redundant code of prevector and speed it up (Akio Nakamura) f0e7aa7 Add new prevector benchmarks. (Evan Klitzke) Pull request description: This branch optimizes various `prevector` operations, especially resizing vectors. While profiling the `loadblk` thread I noticed that a lot of time was being spent in `prevector::resize()` which led to this work. I have some data here indicating that it takes up **37%** of the time in `ReadBlockFromDisk()`: https://monad.io/readblockfromdisk.svg This branch improves things significantly. For trivial types, the new results for the prevector benchmark are: * `PrevectorClearTrivial` which tests `prevector::clear()` becomes 24.6x faster * `PrevectorDestructorTrivial` which tests `prevector::~prevector()` becomes 20.5x faster * `PrevectorResizeTrivial` which tests `prevector::resize()` becomes 20.3x faster Note that in practice it looks like the prevector is only used to contain `unsigned char` types, which is a trivial type. The benchmarks are testing a bit of an extreme case, but the changes here are motivated by the profiling data for `ReadBlockFromDisk()` I linked to above. The pull request here consists of a series of three commits: * The first adds new benchmarks but does not change the prevector code. * The second is from @AkioNak , and merges some prevector optimizations he submitted in #11988 * The third optimizes `prevector::resize()` to use `memset()` when the prevector contains trivially constructible types Tree-SHA512: 28f7cbb91a19f9f43b6a5942781d7eb2e3197389186b666f086b69df12bee37773140f765426d715bfb8ebff79cb27a5f1206d0325b54b4aa65598b50fb18368
2 parents 9e2ed25 + 5aad635 commit 32987d5

File tree

5 files changed

+151
-88
lines changed

5 files changed

+151
-88
lines changed

src/Makefile.bench.include

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ bench_bench_bitcoin_SOURCES = \
2727
bench/lockedpool.cpp \
2828
bench/perf.cpp \
2929
bench/perf.h \
30-
bench/prevector_destructor.cpp
30+
bench/prevector.cpp
3131

3232
nodist_bench_bench_bitcoin_SOURCES = $(GENERATED_BENCH_FILES)
3333

src/bench/prevector.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
// Copyright (c) 2015-2017 The Bitcoin Core developers
2+
// Distributed under the MIT software license, see the accompanying
3+
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4+
5+
#include <compat.h>
6+
#include <prevector.h>
7+
8+
#include <bench/bench.h>
9+
10+
struct nontrivial_t {
11+
int x;
12+
nontrivial_t() :x(-1) {}
13+
};
14+
static_assert(!IS_TRIVIALLY_CONSTRUCTIBLE<nontrivial_t>::value,
15+
"expected nontrivial_t to not be trivially constructible");
16+
17+
typedef unsigned char trivial_t;
18+
static_assert(IS_TRIVIALLY_CONSTRUCTIBLE<trivial_t>::value,
19+
"expected trivial_t to be trivially constructible");
20+
21+
template <typename T>
22+
static void PrevectorDestructor(benchmark::State& state)
23+
{
24+
while (state.KeepRunning()) {
25+
for (auto x = 0; x < 1000; ++x) {
26+
prevector<28, T> t0;
27+
prevector<28, T> t1;
28+
t0.resize(28);
29+
t1.resize(29);
30+
}
31+
}
32+
}
33+
34+
template <typename T>
35+
static void PrevectorClear(benchmark::State& state)
36+
{
37+
38+
while (state.KeepRunning()) {
39+
for (auto x = 0; x < 1000; ++x) {
40+
prevector<28, T> t0;
41+
prevector<28, T> t1;
42+
t0.resize(28);
43+
t0.clear();
44+
t1.resize(29);
45+
t0.clear();
46+
}
47+
}
48+
}
49+
50+
template <typename T>
51+
void PrevectorResize(benchmark::State& state)
52+
{
53+
while (state.KeepRunning()) {
54+
prevector<28, T> t0;
55+
prevector<28, T> t1;
56+
for (auto x = 0; x < 1000; ++x) {
57+
t0.resize(28);
58+
t0.resize(0);
59+
t1.resize(29);
60+
t1.resize(0);
61+
}
62+
}
63+
}
64+
65+
#define PREVECTOR_TEST(name, nontrivops, trivops) \
66+
static void Prevector ## name ## Nontrivial(benchmark::State& state) { \
67+
PrevectorResize<nontrivial_t>(state); \
68+
} \
69+
BENCHMARK(Prevector ## name ## Nontrivial, nontrivops); \
70+
static void Prevector ## name ## Trivial(benchmark::State& state) { \
71+
PrevectorResize<trivial_t>(state); \
72+
} \
73+
BENCHMARK(Prevector ## name ## Trivial, trivops);
74+
75+
PREVECTOR_TEST(Clear, 28300, 88600)
76+
PREVECTOR_TEST(Destructor, 28800, 88900)
77+
PREVECTOR_TEST(Resize, 28900, 90300)

src/bench/prevector_destructor.cpp

Lines changed: 0 additions & 36 deletions
This file was deleted.

src/compat.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,16 @@
1010
#include <config/bitcoin-config.h>
1111
#endif
1212

13+
#include <type_traits>
14+
15+
// GCC 4.8 is missing some C++11 type_traits,
16+
// https://www.gnu.org/software/gcc/gcc-5/changes.html
17+
#if defined(__GNUC__) && __GNUC__ < 5
18+
#define IS_TRIVIALLY_CONSTRUCTIBLE std::is_trivial
19+
#else
20+
#define IS_TRIVIALLY_CONSTRUCTIBLE std::is_trivially_constructible
21+
#endif
22+
1323
#ifdef WIN32
1424
#ifdef _WIN32_WINNT
1525
#undef _WIN32_WINNT

src/prevector.h

Lines changed: 63 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,12 @@
1010
#include <stdint.h>
1111
#include <string.h>
1212

13+
#include <cstddef>
1314
#include <iterator>
1415
#include <type_traits>
1516

17+
#include <compat.h>
18+
1619
#pragma pack(push, 1)
1720
/** Implements a drop-in replacement for std::vector<T> which stores up to N
1821
* elements directly (without heap allocation). The types Size and Diff are
@@ -194,16 +197,42 @@ class prevector {
194197
T* item_ptr(difference_type pos) { return is_direct() ? direct_ptr(pos) : indirect_ptr(pos); }
195198
const T* item_ptr(difference_type pos) const { return is_direct() ? direct_ptr(pos) : indirect_ptr(pos); }
196199

200+
void fill(T* dst, ptrdiff_t count) {
201+
if (IS_TRIVIALLY_CONSTRUCTIBLE<T>::value) {
202+
// The most common use of prevector is where T=unsigned char. For
203+
// trivially constructible types, we can use memset() to avoid
204+
// looping.
205+
::memset(dst, 0, count * sizeof(T));
206+
} else {
207+
for (auto i = 0; i < count; ++i) {
208+
new(static_cast<void*>(dst + i)) T();
209+
}
210+
}
211+
}
212+
213+
void fill(T* dst, ptrdiff_t count, const T& value) {
214+
for (auto i = 0; i < count; ++i) {
215+
new(static_cast<void*>(dst + i)) T(value);
216+
}
217+
}
218+
219+
template<typename InputIterator>
220+
void fill(T* dst, InputIterator first, InputIterator last) {
221+
while (first != last) {
222+
new(static_cast<void*>(dst)) T(*first);
223+
++dst;
224+
++first;
225+
}
226+
}
227+
197228
public:
198229
void assign(size_type n, const T& val) {
199230
clear();
200231
if (capacity() < n) {
201232
change_capacity(n);
202233
}
203-
while (size() < n) {
204-
_size++;
205-
new(static_cast<void*>(item_ptr(size() - 1))) T(val);
206-
}
234+
_size += n;
235+
fill(item_ptr(0), n, val);
207236
}
208237

209238
template<typename InputIterator>
@@ -213,11 +242,8 @@ class prevector {
213242
if (capacity() < n) {
214243
change_capacity(n);
215244
}
216-
while (first != last) {
217-
_size++;
218-
new(static_cast<void*>(item_ptr(size() - 1))) T(*first);
219-
++first;
220-
}
245+
_size += n;
246+
fill(item_ptr(0), first, last);
221247
}
222248

223249
prevector() : _size(0), _union{{}} {}
@@ -228,31 +254,23 @@ class prevector {
228254

229255
explicit prevector(size_type n, const T& val = T()) : _size(0) {
230256
change_capacity(n);
231-
while (size() < n) {
232-
_size++;
233-
new(static_cast<void*>(item_ptr(size() - 1))) T(val);
234-
}
257+
_size += n;
258+
fill(item_ptr(0), n, val);
235259
}
236260

237261
template<typename InputIterator>
238262
prevector(InputIterator first, InputIterator last) : _size(0) {
239263
size_type n = last - first;
240264
change_capacity(n);
241-
while (first != last) {
242-
_size++;
243-
new(static_cast<void*>(item_ptr(size() - 1))) T(*first);
244-
++first;
245-
}
265+
_size += n;
266+
fill(item_ptr(0), first, last);
246267
}
247268

248269
prevector(const prevector<N, T, Size, Diff>& other) : _size(0) {
249-
change_capacity(other.size());
250-
const_iterator it = other.begin();
251-
while (it != other.end()) {
252-
_size++;
253-
new(static_cast<void*>(item_ptr(size() - 1))) T(*it);
254-
++it;
255-
}
270+
size_type n = other.size();
271+
change_capacity(n);
272+
_size += n;
273+
fill(item_ptr(0), other.begin(), other.end());
256274
}
257275

258276
prevector(prevector<N, T, Size, Diff>&& other) : _size(0) {
@@ -263,14 +281,7 @@ class prevector {
263281
if (&other == this) {
264282
return *this;
265283
}
266-
resize(0);
267-
change_capacity(other.size());
268-
const_iterator it = other.begin();
269-
while (it != other.end()) {
270-
_size++;
271-
new(static_cast<void*>(item_ptr(size() - 1))) T(*it);
272-
++it;
273-
}
284+
assign(other.begin(), other.end());
274285
return *this;
275286
}
276287

@@ -314,16 +325,20 @@ class prevector {
314325
}
315326

316327
void resize(size_type new_size) {
317-
if (size() > new_size) {
328+
size_type cur_size = size();
329+
if (cur_size == new_size) {
330+
return;
331+
}
332+
if (cur_size > new_size) {
318333
erase(item_ptr(new_size), end());
334+
return;
319335
}
320336
if (new_size > capacity()) {
321337
change_capacity(new_size);
322338
}
323-
while (size() < new_size) {
324-
_size++;
325-
new(static_cast<void*>(item_ptr(size() - 1))) T();
326-
}
339+
ptrdiff_t increase = new_size - cur_size;
340+
fill(item_ptr(cur_size), increase);
341+
_size += increase;
327342
}
328343

329344
void reserve(size_type new_capacity) {
@@ -346,10 +361,11 @@ class prevector {
346361
if (capacity() < new_size) {
347362
change_capacity(new_size + (new_size >> 1));
348363
}
349-
memmove(item_ptr(p + 1), item_ptr(p), (size() - p) * sizeof(T));
364+
T* ptr = item_ptr(p);
365+
memmove(ptr + 1, ptr, (size() - p) * sizeof(T));
350366
_size++;
351-
new(static_cast<void*>(item_ptr(p))) T(value);
352-
return iterator(item_ptr(p));
367+
new(static_cast<void*>(ptr)) T(value);
368+
return iterator(ptr);
353369
}
354370

355371
void insert(iterator pos, size_type count, const T& value) {
@@ -358,11 +374,10 @@ class prevector {
358374
if (capacity() < new_size) {
359375
change_capacity(new_size + (new_size >> 1));
360376
}
361-
memmove(item_ptr(p + count), item_ptr(p), (size() - p) * sizeof(T));
377+
T* ptr = item_ptr(p);
378+
memmove(ptr + count, ptr, (size() - p) * sizeof(T));
362379
_size += count;
363-
for (size_type i = 0; i < count; i++) {
364-
new(static_cast<void*>(item_ptr(p + i))) T(value);
365-
}
380+
fill(item_ptr(p), count, value);
366381
}
367382

368383
template<typename InputIterator>
@@ -373,13 +388,10 @@ class prevector {
373388
if (capacity() < new_size) {
374389
change_capacity(new_size + (new_size >> 1));
375390
}
376-
memmove(item_ptr(p + count), item_ptr(p), (size() - p) * sizeof(T));
391+
T* ptr = item_ptr(p);
392+
memmove(ptr + count, ptr, (size() - p) * sizeof(T));
377393
_size += count;
378-
while (first != last) {
379-
new(static_cast<void*>(item_ptr(p))) T(*first);
380-
++p;
381-
++first;
382-
}
394+
fill(ptr, first, last);
383395
}
384396

385397
iterator erase(iterator pos) {

0 commit comments

Comments
 (0)