7070 * - Additional benchmarking scenarios can be created by extending `benchmark_interface`.
7171 */
7272
73+ #include < list>
7374#include < malloc.h>
7475#include < random>
7576
@@ -86,6 +87,7 @@ struct alloc_data {
8687};
8788
8889struct next_alloc_data {
90+ bool alloc; // true if allocation, false if deallocation
8991 size_t offset;
9092 size_t size;
9193};
@@ -288,18 +290,17 @@ template <
288290 typename =
289291 std::enable_if_t <std::is_base_of<allocator_interface, Alloc>::value>>
290292class multiple_malloc_free_benchmark : public benchmark_interface <Size, Alloc> {
291- using distribution = std::uniform_int_distribution< size_t >;
293+ protected:
292294 template <class T > using vector2d = std::vector<std::vector<T>>;
293295 using base = benchmark_interface<Size, Alloc>;
294-
295296 int allocsPerIterations = 10 ;
296297 bool thread_local_allocations = true ;
297298 size_t max_allocs = 0 ;
298299
299300 vector2d<alloc_data> allocations;
300301 vector2d<next_alloc_data> next;
301302 using next_alloc_data_iterator =
302- std::vector<next_alloc_data>::const_iterator;
303+ typename std::vector<next_alloc_data>::const_iterator;
303304 std::vector<std::unique_ptr<next_alloc_data_iterator>> next_iter;
304305 int64_t iterations;
305306
@@ -386,15 +387,20 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
386387 auto tid = state.thread_index ();
387388 auto &allocation = allocations[tid];
388389 auto &iter = next_iter[tid];
390+
389391 for (int i = 0 ; i < allocsPerIterations; i++) {
390392 auto &n = *(*iter)++;
391393 auto &alloc = allocation[n.offset ];
392- base::allocator.benchFree (alloc.ptr , alloc.size );
393- alloc.size = n.size ;
394- alloc.ptr = base::allocator.benchAlloc (alloc.size );
395-
396- if (alloc.ptr == NULL ) {
397- state.SkipWithError (" allocation failed" );
394+ if (n.alloc ) {
395+ alloc.ptr = base::allocator.benchAlloc (n.size );
396+ if (alloc.ptr == NULL ) {
397+ state.SkipWithError (" allocation failed" );
398+ }
399+ alloc.size = n.size ;
400+ } else {
401+ base::allocator.benchFree (alloc.ptr , alloc.size );
402+ alloc.ptr = NULL ;
403+ alloc.size = 0 ;
398404 }
399405 }
400406 }
@@ -412,13 +418,14 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
412418 }
413419
414420 private:
415- void prealloc (benchmark::State &state) {
421+ virtual void prealloc (benchmark::State &state) {
416422 auto tid = state.thread_index ();
417423 auto &i = allocations[tid];
418424 i.resize (max_allocs);
419425 auto sizeGenerator = base::alloc_sizes[tid];
420426
421- for (size_t j = 0 ; j < max_allocs; j++) {
427+ // Preallocate half of the available slots, for allocations
428+ for (size_t j = 0 ; j < max_allocs / 2 ; j++) {
422429 auto size = sizeGenerator.nextSize ();
423430 i[j].ptr = base::allocator.benchAlloc (size);
424431 if (i[j].ptr == NULL ) {
@@ -441,20 +448,165 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
441448 }
442449 }
443450
444- void prepareWorkload (benchmark::State &state) {
451+ virtual void prepareWorkload (benchmark::State &state) {
445452 auto tid = state.thread_index ();
446453 auto &n = next[tid];
454+
455+ // Create generators for random index selection and binary decision.
456+ using distribution = std::uniform_int_distribution<size_t >;
447457 std::default_random_engine generator;
448- distribution dist;
458+ distribution dist_offset (0 , max_allocs - 1 );
459+ distribution dist_opt_type (0 , 1 );
449460 generator.seed (0 );
450- dist. param ( distribution::param_type ( 0 , max_allocs - 1 ));
461+
451462 auto sizeGenerator = base::alloc_sizes[tid];
463+ std::vector<size_t > free;
464+ std::vector<size_t > allocated;
465+ free.reserve (max_allocs / 2 );
466+ allocated.reserve (max_allocs / 2 );
467+ // Preallocate memory: initially, half the indices are allocated.
468+ // See prealloc() function;
469+ size_t i = 0 ;
470+ while (i < max_allocs / 2 ) {
471+ allocated.push_back (i++);
472+ }
473+ // The remaining indices are marked as free.
474+ while (i < max_allocs) {
475+ free.push_back (i++);
476+ }
452477
453478 n.clear ();
454479 for (int64_t j = 0 ; j < state.max_iterations * allocsPerIterations;
455480 j++) {
456- n.push_back ({dist (generator), sizeGenerator.nextSize ()});
481+ // Decide whether to allocate or free:
482+ // - If no allocations exist, allocation is forced.
483+ // - If there is maximum number of allocation, free is forced
484+ // - Otherwise, use a binary random choice (0 or 1)
485+ if (allocated.empty () ||
486+ (dist_opt_type (generator) == 0 && !free.empty ())) {
487+ // Allocation:
488+ std::swap (free[dist_offset (generator) % free.size ()],
489+ free.back ());
490+ auto offset = free.back ();
491+ free.pop_back ();
492+
493+ n.push_back ({true , offset, sizeGenerator.nextSize ()});
494+ allocated.push_back (offset);
495+ } else {
496+ // Free
497+ std::swap (allocated[dist_offset (generator) % allocated.size ()],
498+ allocated.back ());
499+ auto offset = allocated.back ();
500+ allocated.pop_back ();
501+
502+ n.push_back ({false , offset, 0 });
503+ free.push_back (offset);
504+ }
457505 }
506+
458507 next_iter[tid] = std::make_unique<next_alloc_data_iterator>(n.cbegin ());
459508 }
460509};
510+ // This class benchmarks performance by randomly allocating and freeing memory.
511+ // Initially, it slowly increases the memory footprint, and later decreases it.
512+ template <
513+ typename Size, typename Alloc,
514+ typename =
515+ std::enable_if_t <std::is_base_of<alloc_size_interface, Size>::value>,
516+ typename =
517+ std::enable_if_t <std::is_base_of<allocator_interface, Alloc>::value>>
518+ class peak_alloc_benchmark
519+ : public multiple_malloc_free_benchmark<Size, Alloc> {
520+ using base = multiple_malloc_free_benchmark<Size, Alloc>;
521+ virtual void prepareWorkload (benchmark::State &state) override {
522+ // Retrieve the thread index and corresponding operation buffer.
523+ auto tid = state.thread_index ();
524+ auto &n = this ->next [tid];
525+
526+ // Set up the random generators for index selection and decision making.
527+ std::default_random_engine generator;
528+ std::uniform_int_distribution<size_t > dist_offset (0 ,
529+ this ->max_allocs - 1 );
530+ std::uniform_real_distribution<double > dist_opt_type (0 , 1 );
531+ generator.seed (0 );
532+ auto sizeGenerator = this ->alloc_sizes [tid];
533+
534+ n.clear ();
535+ std::vector<size_t > free;
536+ std::vector<size_t > allocated;
537+ free.reserve (this ->max_allocs );
538+ // Initially, all indices are available.
539+ for (size_t i = 0 ; i < this ->max_allocs ; i++) {
540+ free.push_back (i);
541+ }
542+
543+ // Total number of allocation/free operations to simulate.
544+ int64_t iters = state.max_iterations * this ->allocsPerIterations ;
545+ for (int64_t j = 0 ; j < iters; j++) {
546+ int64_t target_allocation;
547+ int64_t max_allocs = static_cast <int64_t >(this ->max_allocs );
548+
549+ // Determine the target number of allocations based on the progress of the iterations.
550+ // In the first half of the iterations, the target allocation increases linearly.
551+ // In the second half, it decreases linearly.
552+ if (j < iters / 2 ) {
553+ target_allocation = 2 * max_allocs * j / iters;
554+ } else {
555+ target_allocation =
556+ -2 * max_allocs * j / iters + 2 * max_allocs;
557+ }
558+
559+ // x represents the gap between the target and current allocations.
560+ auto x = static_cast <double >(target_allocation -
561+ static_cast <double >(allocated.size ()));
562+
563+ // Use a normal CDF with high sigma so that when x is positive,
564+ // we are slightly more likely to allocate,
565+ // and when x is negative, slightly more likely to free memory,
566+ // keeping the overall change gradual.
567+
568+ const double sigma = 1000 ;
569+ auto cdf = normalCDF (x, sigma);
570+
571+ // Decide whether to allocate or free:
572+ // - If no allocations exist, allocation is forced.
573+ // - If there is maximum number of allocation, free is forced
574+ // - Otherwise, Based on the computed probability, choose whether to allocate or free
575+ if (allocated.empty () ||
576+ (!free.empty () && cdf > dist_opt_type (generator))) {
577+ // Allocation
578+ std::swap (free[dist_offset (generator) % free.size ()],
579+ free.back ());
580+ auto offset = free.back ();
581+ free.pop_back ();
582+ n.push_back ({true , offset, sizeGenerator.nextSize ()});
583+ allocated.push_back (offset);
584+ } else {
585+ // Free
586+ std::swap (allocated[dist_offset (generator) % allocated.size ()],
587+ allocated.back ());
588+ auto offset = allocated.back ();
589+ allocated.pop_back ();
590+ n.push_back ({false , offset, 0 });
591+ free.push_back (offset);
592+ }
593+ }
594+
595+ this ->next_iter [tid] =
596+ std::make_unique<std::vector<next_alloc_data>::const_iterator>(
597+ n.cbegin ());
598+ }
599+
600+ virtual void prealloc (benchmark::State &state) {
601+ auto tid = state.thread_index ();
602+ auto &i = base::allocations[tid];
603+ i.resize (base::max_allocs);
604+ }
605+ virtual std::string name () { return base::base::name () + " /peak_alloc" ; }
606+
607+ private:
608+ // Function to calculate the CDF of a normal distribution
609+ double normalCDF (double x, double sigma = 1.0 , double mu = 0.0 ) {
610+ return 0.5 * (1 + std::erf ((x - mu) / (sigma * std::sqrt (2.0 ))));
611+ }
612+ };
0 commit comments