7070 * - Additional benchmarking scenarios can be created by extending `benchmark_interface`.
7171 */
7272
73+ #include < list>
7374#include < malloc.h>
7475#include < random>
7576
@@ -86,6 +87,7 @@ struct alloc_data {
8687};
8788
8889struct next_alloc_data {
90+ bool alloc; // true if allocation, false if deallocation
8991 size_t offset;
9092 size_t size;
9193};
@@ -288,18 +290,17 @@ template <
288290 typename =
289291 std::enable_if_t <std::is_base_of<allocator_interface, Alloc>::value>>
290292class multiple_malloc_free_benchmark : public benchmark_interface <Size, Alloc> {
291- using distribution = std::uniform_int_distribution< size_t >;
293+ protected:
292294 template <class T > using vector2d = std::vector<std::vector<T>>;
293295 using base = benchmark_interface<Size, Alloc>;
294-
295296 int allocsPerIterations = 10 ;
296297 bool thread_local_allocations = true ;
297298 size_t max_allocs = 0 ;
298299
299300 vector2d<alloc_data> allocations;
300301 vector2d<next_alloc_data> next;
301302 using next_alloc_data_iterator =
302- std::vector<next_alloc_data>::const_iterator;
303+ typename std::vector<next_alloc_data>::const_iterator;
303304 std::vector<std::unique_ptr<next_alloc_data_iterator>> next_iter;
304305 int64_t iterations;
305306
@@ -386,15 +387,20 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
386387 auto tid = state.thread_index ();
387388 auto &allocation = allocations[tid];
388389 auto &iter = next_iter[tid];
390+
389391 for (int i = 0 ; i < allocsPerIterations; i++) {
390392 auto &n = *(*iter)++;
391393 auto &alloc = allocation[n.offset ];
392- base::allocator.benchFree (alloc.ptr , alloc.size );
393- alloc.size = n.size ;
394- alloc.ptr = base::allocator.benchAlloc (alloc.size );
395-
396- if (alloc.ptr == NULL ) {
397- state.SkipWithError (" allocation failed" );
394+ if (n.alloc ) {
395+ alloc.ptr = base::allocator.benchAlloc (n.size );
396+ if (alloc.ptr == NULL ) {
397+ state.SkipWithError (" allocation failed" );
398+ }
399+ alloc.size = n.size ;
400+ } else {
401+ base::allocator.benchFree (alloc.ptr , alloc.size );
402+ alloc.ptr = NULL ;
403+ alloc.size = 0 ;
398404 }
399405 }
400406 }
@@ -412,13 +418,13 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
412418 }
413419
414420 private:
415- void prealloc (benchmark::State &state) {
421+ virtual void prealloc (benchmark::State &state) {
416422 auto tid = state.thread_index ();
417423 auto &i = allocations[tid];
418424 i.resize (max_allocs);
419425 auto sizeGenerator = base::alloc_sizes[tid];
420426
421- for (size_t j = 0 ; j < max_allocs; j++) {
427+ for (size_t j = 0 ; j < max_allocs / 2 ; j++) {
422428 auto size = sizeGenerator.nextSize ();
423429 i[j].ptr = base::allocator.benchAlloc (size);
424430 if (i[j].ptr == NULL ) {
@@ -441,20 +447,141 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
441447 }
442448 }
443449
444- void prepareWorkload (benchmark::State &state) {
450+ virtual void prepareWorkload (benchmark::State &state) {
445451 auto tid = state.thread_index ();
446452 auto &n = next[tid];
453+ using distribution = std::uniform_int_distribution<size_t >;
447454 std::default_random_engine generator;
448- distribution dist;
455+ distribution dist_offset (0 , max_allocs - 1 );
456+ distribution dist_opt_type (0 , 1 );
449457 generator.seed (0 );
450- dist.param (distribution::param_type (0 , max_allocs - 1 ));
451458 auto sizeGenerator = base::alloc_sizes[tid];
459+ std::vector<size_t > free;
460+ std::vector<size_t > allocated;
461+
462+ // this benchmark prealloc memory, so we start with some allocation
463+ size_t i = 0 ;
464+ for (; i < max_allocs / 2 ; i++) {
465+ allocated.push_back (i);
466+ }
467+ for (; i < max_allocs; i++) {
468+ free.push_back (i);
469+ }
452470
453471 n.clear ();
454472 for (int64_t j = 0 ; j < state.max_iterations * allocsPerIterations;
455473 j++) {
456- n.push_back ({dist (generator), sizeGenerator.nextSize ()});
474+ if (allocated.empty () ||
475+ (dist_opt_type (generator) == 0 && !free.empty ())) {
476+
477+ std::swap (free[dist_offset (generator) % free.size ()],
478+ free.back ());
479+ auto offset = free.back ();
480+ free.pop_back ();
481+
482+ n.push_back ({true , offset, sizeGenerator.nextSize ()});
483+ allocated.push_back (offset);
484+ } else {
485+ std::swap (allocated[dist_offset (generator) % allocated.size ()],
486+ allocated.back ());
487+ auto offset = allocated.back ();
488+ allocated.pop_back ();
489+
490+ n.push_back ({false , offset, 0 });
491+ free.push_back (offset);
492+ }
457493 }
494+
458495 next_iter[tid] = std::make_unique<next_alloc_data_iterator>(n.cbegin ());
459496 }
460497};
498+
499+ // This class benchmarks performance randomly allocates and frees,
500+ // Firstly slowly increasing memory footprint, and later decreasing
501+ template <
502+ typename Size, typename Alloc,
503+ typename =
504+ std::enable_if_t <std::is_base_of<alloc_size_interface, Size>::value>,
505+ typename =
506+ std::enable_if_t <std::is_base_of<allocator_interface, Alloc>::value>>
507+ class peak_alloc_benchmark
508+ : public multiple_malloc_free_benchmark<Size, Alloc> {
509+ using base = multiple_malloc_free_benchmark<Size, Alloc>;
510+ virtual void prepareWorkload (benchmark::State &state) override {
511+
512+ auto tid = state.thread_index ();
513+ auto &n = this ->next [tid];
514+ std::default_random_engine generator;
515+ std::uniform_int_distribution<size_t > dist_offset (0 ,
516+ this ->max_allocs - 1 );
517+ std::uniform_real_distribution<double > dist_opt_type (0 , 1 );
518+ generator.seed (0 );
519+ auto sizeGenerator = this ->alloc_sizes [tid];
520+
521+ n.clear ();
522+ std::vector<size_t > free;
523+ std::vector<size_t > allocated;
524+ // we start without any allocations
525+ for (size_t i = 0 ; i < this ->max_allocs ; i++) {
526+ free.push_back (i);
527+ }
528+
529+ int64_t iterations = state.max_iterations * this ->allocsPerIterations ;
530+ for (int64_t j = 0 ; j < iterations; j++) {
531+ int64_t target_allocation;
532+ int64_t max_allocs = static_cast <int64_t >(this ->max_allocs );
533+ if (j < iterations / 2 ) {
534+ target_allocation = 2 * max_allocs * j / iterations;
535+ } else {
536+ target_allocation =
537+ -2 * max_allocs * j / iterations + 2 * max_allocs;
538+ }
539+
540+ auto x = static_cast <double >(target_allocation -
541+ static_cast <double >(allocated.size ()));
542+ // high sigma value cause small changes in probability between alloc and free
543+ // based on offset x from target_allocation number.
544+ // sigma == 1000 causes that actual number of allocation are +/-40 of target number
545+ const double sigma = 1000 ;
546+ auto cdf = normalCDF (x, sigma);
547+
548+ if (allocated.empty () ||
549+ (!free.empty () && cdf > dist_opt_type (generator))) {
550+ // allocate
551+ std::swap (free[dist_offset (generator) % free.size ()],
552+ free.back ());
553+ auto offset = free.back ();
554+ free.pop_back ();
555+ n.push_back ({true , offset, sizeGenerator.nextSize ()});
556+ allocated.push_back (offset);
557+
558+ } else {
559+ // free
560+ std::swap (allocated[dist_offset (generator) % allocated.size ()],
561+ allocated.back ());
562+ auto offset = allocated.back ();
563+ allocated.pop_back ();
564+
565+ n.push_back ({false , offset, 0 });
566+ free.push_back (offset);
567+ }
568+ }
569+
570+ this ->next_iter [tid] =
571+ std::make_unique<std::vector<next_alloc_data>::const_iterator>(
572+ n.cbegin ());
573+ }
574+
575+ virtual void prealloc (benchmark::State &state) {
576+ auto tid = state.thread_index ();
577+ auto &i = base::allocations[tid];
578+ i.resize (base::max_allocs);
579+ }
580+ virtual std::string name () { return base::base::name () + " /peak_alloc" ; }
581+
582+ private:
583+ // Function to calculate the CDF of a normal distribution
584+ double normalCDF (double x, double sigma = 1.0 , double mu = 0.0 ) {
585+ return 0.5 * (1 + std::erf ((x - mu) / (sigma * std::sqrt (2.0 ))));
586+ }
587+ };
0 commit comments