Skip to content

Commit c831f6e

Browse files
committed
add memusage stat to os_provider and use it in benchmarks
1 parent dcd7c9c commit c831f6e

File tree

7 files changed

+222
-25
lines changed

7 files changed

+222
-25
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ if(PROJECT_VERSION_PATCH GREATER 0)
3838
set(UMF_VERSION_BUGFIX 1)
3939
endif()
4040

41+
set(CMAKE_C_STANDARD 11)
4142
include(CTest)
4243
include(CMakePackageConfigHelpers)
4344
include(GNUInstallDirs)

benchmark/benchmark.hpp

Lines changed: 75 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,14 @@ class provider_allocator : public allocator_interface {
173173
return argPos;
174174
}
175175

176+
void preBench(::benchmark::State &state) override {
177+
provider.preBench(state);
178+
}
179+
180+
void postBench(::benchmark::State &state) override {
181+
provider.postBench(state);
182+
}
183+
176184
void TearDown(::benchmark::State &state) override {
177185
provider.TearDown(state);
178186
}
@@ -204,13 +212,18 @@ template <typename Pool> class pool_allocator : public allocator_interface {
204212
return argPos;
205213
}
206214

215+
void preBench(::benchmark::State &state) override { pool.preBench(state); }
216+
void postBench(::benchmark::State &state) override {
217+
pool.postBench(state);
218+
}
219+
207220
void TearDown(::benchmark::State &state) override { pool.TearDown(state); }
208221

209-
virtual void *benchAlloc(size_t size) override {
222+
void *benchAlloc(size_t size) override {
210223
return umfPoolMalloc(pool.pool, size);
211224
}
212225

213-
virtual void benchFree(void *ptr, [[maybe_unused]] size_t size) override {
226+
void benchFree(void *ptr, [[maybe_unused]] size_t size) override {
214227
umfPoolFree(pool.pool, ptr);
215228
}
216229

@@ -241,7 +254,7 @@ struct benchmark_interface : public benchmark::Fixture {
241254
allocator.TearDown(state);
242255
}
243256

244-
virtual void bench(::benchmark::State &state) = 0;
257+
void bench([[maybe_unused]] ::benchmark::State &state){};
245258

246259
virtual std::vector<std::string> argsName() {
247260
auto s = Size::argsName();
@@ -260,6 +273,9 @@ struct benchmark_interface : public benchmark::Fixture {
260273
benchmark->ArgNames(bench->argsName())->Name(bench->name());
261274
}
262275

276+
void custom_counters(::benchmark::State &state) {
277+
allocator.custom_counters(state);
278+
}
263279
std::vector<Size> alloc_sizes;
264280
Allocator allocator;
265281
};
@@ -282,7 +298,8 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
282298

283299
vector2d<alloc_data> allocations;
284300
std::vector<unsigned> iters;
285-
301+
std::vector<size_t> memused;
302+
std::vector<size_t> peakmemory;
286303
vector2d<next_alloc_data> next;
287304
std::vector<std::vector<next_alloc_data>::const_iterator> next_iter;
288305
int64_t iterations;
@@ -302,6 +319,12 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
302319
allocations.resize(state.threads());
303320
next.resize(state.threads());
304321
next_iter.resize(state.threads());
322+
memused.resize(state.threads());
323+
peakmemory.resize(state.threads());
324+
for (int i = 0; i < state.threads(); i++) {
325+
memused[i] = 0;
326+
peakmemory[i] = 0;
327+
}
305328

306329
#ifndef WIN32
307330
// Ensure that system malloc does not have memory pooled on the heap
@@ -323,13 +346,49 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
323346
waitForAllThreads(state);
324347
// prepare workload for actual benchmark.
325348
freeAllocs(state);
349+
// reset peak memory before real benchmark
350+
for (auto &i : peakmemory) {
351+
i = 0;
352+
}
353+
326354
prealloc(state);
327355
prepareWorkload(state);
356+
waitForAllThreads(state);
357+
base::allocator.preBench(state);
328358
}
329359

330360
void TearDown(::benchmark::State &state) override {
361+
base::allocator.postBench(state);
331362
auto tid = state.thread_index();
363+
if (tid == 0) {
364+
size_t current_memory_allocated = 0;
365+
for (int i = 0; i < state.threads(); i++) {
366+
current_memory_allocated += memused[i];
367+
}
368+
size_t peak_memory_allocated =
369+
*std::max_element(peakmemory.begin(), peakmemory.end());
370+
371+
size_t peak_memory_used = state.counters["peak_memory_use"];
372+
size_t current_memory_used = state.counters["current_memory_use"];
373+
374+
if (peak_memory_used != 0) {
375+
state.counters["peak_memory_fragmentation"] =
376+
100.0 * (peak_memory_used - peak_memory_allocated) /
377+
peak_memory_used;
378+
}
379+
380+
if (current_memory_used != 0) {
381+
state.counters["current_memory_fragmentation"] =
382+
100.0 * (current_memory_used - current_memory_allocated) /
383+
current_memory_used;
384+
}
385+
386+
state.counters["current_memory_allocated"] =
387+
current_memory_allocated;
388+
state.counters["peak_memory_allocated"] = peak_memory_allocated;
389+
}
332390

391+
waitForAllThreads(state);
333392
freeAllocs(state);
334393
waitForAllThreads(state);
335394
if (tid == 0) {
@@ -342,20 +401,24 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
342401
base::TearDown(state);
343402
}
344403

345-
void bench(benchmark::State &state) override {
404+
void bench(benchmark::State &state) {
346405
auto tid = state.thread_index();
347406
auto &allocation = allocations[tid];
407+
auto &memuse = memused[tid];
408+
auto &peak = peakmemory[tid];
348409
for (int i = 0; i < allocsPerIterations; i++) {
349410
auto &n = *next_iter[tid]++;
350411
auto &alloc = allocation[n.offset];
351412
base::allocator.benchFree(alloc.ptr, alloc.size);
352-
413+
memuse -= alloc.size;
353414
alloc.size = n.size;
354415
alloc.ptr = base::allocator.benchAlloc(alloc.size);
355416

356417
if (alloc.ptr == NULL) {
357418
state.SkipWithError("allocation failed");
358419
}
420+
memuse += alloc.size;
421+
peak = std::max(peak, memuse);
359422
}
360423
}
361424

@@ -376,7 +439,10 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
376439
auto tid = state.thread_index();
377440
auto &i = allocations[tid];
378441
i.resize(max_allocs);
442+
auto &memuse = memused[tid];
443+
auto &peak = peakmemory[tid];
379444
auto sizeGenerator = base::alloc_sizes[tid];
445+
380446
for (size_t j = 0; j < max_allocs; j++) {
381447
auto size = sizeGenerator.nextSize();
382448
i[j].ptr = base::allocator.benchAlloc(size);
@@ -385,6 +451,8 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
385451
return;
386452
}
387453
i[j].size = size;
454+
memuse += size;
455+
peak = std::max(peak, memuse);
388456
}
389457
}
390458

@@ -394,6 +462,7 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
394462
for (auto &j : i) {
395463
if (j.ptr != NULL) {
396464
base::allocator.benchFree(j.ptr, j.size);
465+
memused[tid] -= j.size;
397466
j.ptr = NULL;
398467
j.size = 0;
399468
}

benchmark/benchmark_umf.hpp

Lines changed: 50 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ struct provider_interface {
2929
using params_ptr = std::unique_ptr<void, void (*)(void *)>;
3030

3131
umf_memory_provider_handle_t provider = NULL;
32-
virtual void SetUp(::benchmark::State &state) {
32+
void SetUp(::benchmark::State &state) {
3333
if (state.thread_index() != 0) {
3434
return;
3535
}
@@ -41,7 +41,26 @@ struct provider_interface {
4141
}
4242
}
4343

44-
virtual void TearDown([[maybe_unused]] ::benchmark::State &state) {
44+
void preBench([[maybe_unused]] ::benchmark::State &state) {
45+
if (state.thread_index() != 0) {
46+
return;
47+
}
48+
umfCtlExec("umf.provider.by_handle.stats.reset", provider, NULL);
49+
}
50+
51+
void postBench([[maybe_unused]] ::benchmark::State &state) {
52+
if (state.thread_index() != 0) {
53+
return;
54+
}
55+
size_t arg;
56+
umfCtlGet("umf.provider.by_handle.stats.peak_memory", provider, &arg);
57+
state.counters["peak_memory_use"] = arg;
58+
umfCtlGet("umf.provider.by_handle.stats.allocated_memory", provider,
59+
&arg);
60+
state.counters["current_memory_use"] = arg;
61+
}
62+
63+
void TearDown([[maybe_unused]] ::benchmark::State &state) {
4564
if (state.thread_index() != 0) {
4665
return;
4766
}
@@ -52,9 +71,7 @@ struct provider_interface {
5271
}
5372

5473
virtual umf_memory_provider_ops_t *
55-
getOps([[maybe_unused]] ::benchmark::State &state) {
56-
return nullptr;
57-
}
74+
getOps([[maybe_unused]] ::benchmark::State &state) = 0;
5875

5976
virtual params_ptr getParams([[maybe_unused]] ::benchmark::State &state) {
6077
return {nullptr, [](void *) {}};
@@ -67,7 +84,7 @@ template <typename T,
6784
struct pool_interface {
6885
using params_ptr = std::unique_ptr<void, void (*)(void *)>;
6986

70-
virtual void SetUp(::benchmark::State &state) {
87+
void SetUp(::benchmark::State &state) {
7188
provider.SetUp(state);
7289
if (state.thread_index() != 0) {
7390
return;
@@ -79,7 +96,22 @@ struct pool_interface {
7996
state.SkipWithError("umfPoolCreate() failed");
8097
}
8198
}
82-
virtual void TearDown([[maybe_unused]] ::benchmark::State &state) {
99+
100+
void preBench([[maybe_unused]] ::benchmark::State &state) {
101+
provider.preBench(state);
102+
if (state.thread_index() != 0) {
103+
return;
104+
}
105+
}
106+
107+
void postBench([[maybe_unused]] ::benchmark::State &state) {
108+
provider.postBench(state);
109+
if (state.thread_index() != 0) {
110+
return;
111+
}
112+
}
113+
114+
void TearDown([[maybe_unused]] ::benchmark::State &state) {
83115
if (state.thread_index() != 0) {
84116
return;
85117
}
@@ -92,15 +124,17 @@ struct pool_interface {
92124
if (pool) {
93125
umfPoolDestroy(pool);
94126
}
127+
128+
provider.TearDown(state);
95129
};
96130

97131
virtual umf_memory_pool_ops_t *
98-
getOps([[maybe_unused]] ::benchmark::State &state) {
99-
return nullptr;
100-
}
132+
getOps([[maybe_unused]] ::benchmark::State &state) = 0;
133+
101134
virtual params_ptr getParams([[maybe_unused]] ::benchmark::State &state) {
102135
return {nullptr, [](void *) {}};
103136
}
137+
104138
T provider;
105139
umf_memory_pool_handle_t pool;
106140
};
@@ -109,6 +143,8 @@ class allocator_interface {
109143
public:
110144
virtual unsigned SetUp([[maybe_unused]] ::benchmark::State &state,
111145
[[maybe_unused]] unsigned argPos) = 0;
146+
virtual void preBench([[maybe_unused]] ::benchmark::State &state) = 0;
147+
virtual void postBench([[maybe_unused]] ::benchmark::State &state) = 0;
112148
virtual void TearDown([[maybe_unused]] ::benchmark::State &state) = 0;
113149
virtual void *benchAlloc(size_t size) = 0;
114150
virtual void benchFree(void *ptr, [[maybe_unused]] size_t size) = 0;
@@ -120,7 +156,9 @@ struct glibc_malloc : public allocator_interface {
120156
unsigned argPos) override {
121157
return argPos;
122158
}
123-
void TearDown([[maybe_unused]] ::benchmark::State &state) override{};
159+
void preBench([[maybe_unused]] ::benchmark::State &state) override {}
160+
void postBench([[maybe_unused]] ::benchmark::State &state) override {}
161+
void TearDown([[maybe_unused]] ::benchmark::State &state) override {}
124162
void *benchAlloc(size_t size) override { return malloc(size); }
125163
void benchFree(void *ptr, [[maybe_unused]] size_t size) override {
126164
free(ptr);
@@ -236,7 +274,7 @@ struct jemalloc_pool : public pool_interface<Provider> {
236274
#ifdef UMF_POOL_SCALABLE_ENABLED
237275
template <typename Provider>
238276
struct scalable_pool : public pool_interface<Provider> {
239-
virtual umf_memory_pool_ops_t *
277+
umf_memory_pool_ops_t *
240278
getOps([[maybe_unused]] ::benchmark::State &state) override {
241279
return umfScalablePoolOps();
242280
}

cmake/helpers.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ function(add_umf_target_compile_options name)
270270
/W4
271271
/Gy
272272
/GS
273+
/experimental:c11atomics
273274
# disable warning 6326: Potential comparison of a constant
274275
# with another constant
275276
/wd6326

0 commit comments

Comments
 (0)