7070 * - Additional benchmarking scenarios can be created by extending `benchmark_interface`.
7171 */
7272
73+ #include < list>
7374#include < malloc.h>
7475#include < random>
7576
@@ -86,6 +87,7 @@ struct alloc_data {
8687};
8788
8889struct next_alloc_data {
90+ bool alloc; // true if allocation, false if deallocation
8991 size_t offset;
9092 size_t size;
9193};
@@ -288,18 +290,17 @@ template <
288290 typename =
289291 std::enable_if_t <std::is_base_of<allocator_interface, Alloc>::value>>
290292class multiple_malloc_free_benchmark : public benchmark_interface <Size, Alloc> {
291- using distribution = std::uniform_int_distribution< size_t >;
293+ protected:
292294 template <class T > using vector2d = std::vector<std::vector<T>>;
293295 using base = benchmark_interface<Size, Alloc>;
294-
295296 int allocsPerIterations = 10 ;
296297 bool thread_local_allocations = true ;
297298 size_t max_allocs = 0 ;
298299
299300 vector2d<alloc_data> allocations;
300301 vector2d<next_alloc_data> next;
301302 using next_alloc_data_iterator =
302- std::vector<next_alloc_data>::const_iterator;
303+ typename std::vector<next_alloc_data>::const_iterator;
303304 std::vector<std::unique_ptr<next_alloc_data_iterator>> next_iter;
304305 int64_t iterations;
305306
@@ -386,15 +387,20 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
386387 auto tid = state.thread_index ();
387388 auto &allocation = allocations[tid];
388389 auto &iter = next_iter[tid];
390+
389391 for (int i = 0 ; i < allocsPerIterations; i++) {
390392 auto &n = *(*iter)++;
391393 auto &alloc = allocation[n.offset ];
392- base::allocator.benchFree (alloc.ptr , alloc.size );
393- alloc.size = n.size ;
394- alloc.ptr = base::allocator.benchAlloc (alloc.size );
395-
396- if (alloc.ptr == NULL ) {
397- state.SkipWithError (" allocation failed" );
394+ if (n.alloc ) {
395+ alloc.ptr = base::allocator.benchAlloc (n.size );
396+ if (alloc.ptr == NULL ) {
397+ state.SkipWithError (" allocation failed" );
398+ }
399+ alloc.size = n.size ;
400+ } else {
401+ base::allocator.benchFree (alloc.ptr , alloc.size );
402+ alloc.ptr = NULL ;
403+ alloc.size = 0 ;
398404 }
399405 }
400406 }
@@ -412,13 +418,14 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
412418 }
413419
414420 private:
415- void prealloc (benchmark::State &state) {
421+ virtual void prealloc (benchmark::State &state) {
416422 auto tid = state.thread_index ();
417423 auto &i = allocations[tid];
418424 i.resize (max_allocs);
419425 auto sizeGenerator = base::alloc_sizes[tid];
420426
421- for (size_t j = 0 ; j < max_allocs; j++) {
427+ // Preallocate half of the available slots, for allocations
428+ for (size_t j = 0 ; j < max_allocs / 2 ; j++) {
422429 auto size = sizeGenerator.nextSize ();
423430 i[j].ptr = base::allocator.benchAlloc (size);
424431 if (i[j].ptr == NULL ) {
@@ -441,20 +448,164 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
441448 }
442449 }
443450
444- void prepareWorkload (benchmark::State &state) {
451+ virtual void prepareWorkload (benchmark::State &state) {
445452 auto tid = state.thread_index ();
446453 auto &n = next[tid];
454+
455+ // Create generators for random index selection and binary decision.
456+ using distribution = std::uniform_int_distribution<size_t >;
447457 std::default_random_engine generator;
448- distribution dist;
458+ distribution dist_offset (0 , max_allocs - 1 );
459+ distribution dist_opt_type (0 , 1 );
449460 generator.seed (0 );
450- dist. param ( distribution::param_type ( 0 , max_allocs - 1 ));
461+
451462 auto sizeGenerator = base::alloc_sizes[tid];
463+ std::vector<size_t > free;
464+ std::vector<size_t > allocated;
465+
466+ // Preallocate memory: initially, half the indices are allocated.
467+ // See prealloc() function;
468+ size_t i = 0 ;
469+ for (; i < max_allocs / 2 ; i++) {
470+ allocated.push_back (i);
471+ }
472+ // The remaining indices are marked as free.
473+ for (; i < max_allocs; i++) {
474+ free.push_back (i);
475+ }
452476
453477 n.clear ();
454478 for (int64_t j = 0 ; j < state.max_iterations * allocsPerIterations;
455479 j++) {
456- n.push_back ({dist (generator), sizeGenerator.nextSize ()});
480+ // Decide whether to allocate or free:
481+ // - If no allocations exist, allocation is forced.
482+ // - If there is maximum number of allocation, free is forced
483+ // - Otherwise, use a binary random choice (0 or 1)
484+ if (allocated.empty () ||
485+ (dist_opt_type (generator) == 0 && !free.empty ())) {
486+ // Allocation:
487+ std::swap (free[dist_offset (generator) % free.size ()],
488+ free.back ());
489+ auto offset = free.back ();
490+ free.pop_back ();
491+
492+ n.push_back ({true , offset, sizeGenerator.nextSize ()});
493+ allocated.push_back (offset);
494+ } else {
495+ // Free
496+ std::swap (allocated[dist_offset (generator) % allocated.size ()],
497+ allocated.back ());
498+ auto offset = allocated.back ();
499+ allocated.pop_back ();
500+
501+ n.push_back ({false , offset, 0 });
502+ free.push_back (offset);
503+ }
457504 }
505+
458506 next_iter[tid] = std::make_unique<next_alloc_data_iterator>(n.cbegin ());
459507 }
460508};
509+ // This class benchmarks performance by randomly allocating and freeing memory.
510+ // Initially, it slowly increases the memory footprint, and later decreases it."
511+ template <
512+ typename Size, typename Alloc,
513+ typename =
514+ std::enable_if_t <std::is_base_of<alloc_size_interface, Size>::value>,
515+ typename =
516+ std::enable_if_t <std::is_base_of<allocator_interface, Alloc>::value>>
517+ class peak_alloc_benchmark
518+ : public multiple_malloc_free_benchmark<Size, Alloc> {
519+ using base = multiple_malloc_free_benchmark<Size, Alloc>;
520+ virtual void prepareWorkload (benchmark::State &state) override {
521+ // Retrieve the thread index and corresponding operation buffer.
522+ auto tid = state.thread_index ();
523+ auto &n = this ->next [tid];
524+
525+ // Set up the random generators for index selection and decision making.
526+ std::default_random_engine generator;
527+ std::uniform_int_distribution<size_t > dist_offset (0 ,
528+ this ->max_allocs - 1 );
529+ std::uniform_real_distribution<double > dist_opt_type (0 , 1 );
530+ generator.seed (0 );
531+ auto sizeGenerator = this ->alloc_sizes [tid];
532+
533+ n.clear ();
534+ std::vector<size_t > free;
535+ std::vector<size_t > allocated;
536+
537+ // Initially, all indices are available.
538+ for (size_t i = 0 ; i < this ->max_allocs ; i++) {
539+ free.push_back (i);
540+ }
541+
542+ // Total number of allocation/free operations to simulate.
543+ int64_t iterations = state.max_iterations * this ->allocsPerIterations ;
544+ for (int64_t j = 0 ; j < iterations; j++) {
545+ int64_t target_allocation;
546+ int64_t max_allocs = static_cast <int64_t >(this ->max_allocs );
547+
548+ // Determine the target number of allocations based on the progress of the iterations.
549+ // In the first half of the iterations, the target allocation increases linearly.
550+ // In the second half, it decreases linearly.
551+ if (j < iterations / 2 ) {
552+ target_allocation = 2 * max_allocs * j / iterations;
553+ } else {
554+ target_allocation =
555+ -2 * max_allocs * j / iterations + 2 * max_allocs;
556+ }
557+
558+ // x represents the gap between the target and current allocations.
559+ auto x = static_cast <double >(target_allocation -
560+ static_cast <double >(allocated.size ()));
561+
562+ // Use a normal CDF with high sigma so that when x is positive,
563+ // we are slightly more likely to allocate,
564+ // and when x is negative, slightly more likely to free memory,
565+ // keeping the overall change gradual.
566+
567+ const double sigma = 1000 ;
568+ auto cdf = normalCDF (x, sigma);
569+
570+ // Decide whether to allocate or free:
571+ // - If no allocations exist, allocation is forced.
572+ // - If there is maximum number of allocation, free is forced
573+ // - Otherwise, Based on the computed probability, choose whether to allocate or free
574+ if (allocated.empty () ||
575+ (!free.empty () && cdf > dist_opt_type (generator))) {
576+ // Allocation
577+ std::swap (free[dist_offset (generator) % free.size ()],
578+ free.back ());
579+ auto offset = free.back ();
580+ free.pop_back ();
581+ n.push_back ({true , offset, sizeGenerator.nextSize ()});
582+ allocated.push_back (offset);
583+ } else {
584+ // Free
585+ std::swap (allocated[dist_offset (generator) % allocated.size ()],
586+ allocated.back ());
587+ auto offset = allocated.back ();
588+ allocated.pop_back ();
589+ n.push_back ({false , offset, 0 });
590+ free.push_back (offset);
591+ }
592+ }
593+
594+ this ->next_iter [tid] =
595+ std::make_unique<std::vector<next_alloc_data>::const_iterator>(
596+ n.cbegin ());
597+ }
598+
599+ virtual void prealloc (benchmark::State &state) {
600+ auto tid = state.thread_index ();
601+ auto &i = base::allocations[tid];
602+ i.resize (base::max_allocs);
603+ }
604+ virtual std::string name () { return base::base::name () + " /peak_alloc" ; }
605+
606+ private:
607+ // Function to calculate the CDF of a normal distribution
608+ double normalCDF (double x, double sigma = 1.0 , double mu = 0.0 ) {
609+ return 0.5 * (1 + std::erf ((x - mu) / (sigma * std::sqrt (2.0 ))));
610+ }
611+ };
0 commit comments