7070 * - Additional benchmarking scenarios can be created by extending `benchmark_interface`.
7171 */
7272
73+ #include < list>
7374#include < malloc.h>
7475#include < random>
7576
@@ -86,6 +87,7 @@ struct alloc_data {
8687};
8788
8889struct next_alloc_data {
90+ bool alloc; // true if allocation, false if deallocation
8991 size_t offset;
9092 size_t size;
9193};
@@ -288,18 +290,17 @@ template <
288290 typename =
289291 std::enable_if_t <std::is_base_of<allocator_interface, Alloc>::value>>
290292class multiple_malloc_free_benchmark : public benchmark_interface <Size, Alloc> {
291- using distribution = std::uniform_int_distribution< size_t >;
293+ protected:
292294 template <class T > using vector2d = std::vector<std::vector<T>>;
293295 using base = benchmark_interface<Size, Alloc>;
294-
295296 int allocsPerIterations = 10 ;
296297 bool thread_local_allocations = true ;
297298 size_t max_allocs = 0 ;
298299
299300 vector2d<alloc_data> allocations;
300301 vector2d<next_alloc_data> next;
301302 using next_alloc_data_iterator =
302- std::vector<next_alloc_data>::const_iterator;
303+ typename std::vector<next_alloc_data>::const_iterator;
303304 std::vector<std::unique_ptr<next_alloc_data_iterator>> next_iter;
304305 int64_t iterations;
305306
@@ -386,15 +387,20 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
386387 auto tid = state.thread_index ();
387388 auto &allocation = allocations[tid];
388389 auto &iter = next_iter[tid];
390+
389391 for (int i = 0 ; i < allocsPerIterations; i++) {
390392 auto &n = *(*iter)++;
391393 auto &alloc = allocation[n.offset ];
392- base::allocator.benchFree (alloc.ptr , alloc.size );
393- alloc.size = n.size ;
394- alloc.ptr = base::allocator.benchAlloc (alloc.size );
395-
396- if (alloc.ptr == NULL ) {
397- state.SkipWithError (" allocation failed" );
394+ if (n.alloc ) {
395+ alloc.ptr = base::allocator.benchAlloc (n.size );
396+ if (alloc.ptr == NULL ) {
397+ state.SkipWithError (" allocation failed" );
398+ }
399+ alloc.size = n.size ;
400+ } else {
401+ base::allocator.benchFree (alloc.ptr , alloc.size );
402+ alloc.ptr = NULL ;
403+ alloc.size = 0 ;
398404 }
399405 }
400406 }
@@ -412,13 +418,14 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
412418 }
413419
414420 private:
415- void prealloc (benchmark::State &state) {
421+ virtual void prealloc (benchmark::State &state) {
416422 auto tid = state.thread_index ();
417423 auto &i = allocations[tid];
418424 i.resize (max_allocs);
419425 auto sizeGenerator = base::alloc_sizes[tid];
420426
421- for (size_t j = 0 ; j < max_allocs; j++) {
427+ // Preallocate half of the available slots, for allocations
428+ for (size_t j = 0 ; j < max_allocs / 2 ; j++) {
422429 auto size = sizeGenerator.nextSize ();
423430 i[j].ptr = base::allocator.benchAlloc (size);
424431 if (i[j].ptr == NULL ) {
@@ -441,20 +448,168 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
441448 }
442449 }
443450
444- void prepareWorkload (benchmark::State &state) {
451+ virtual void prepareWorkload (benchmark::State &state) {
445452 auto tid = state.thread_index ();
446453 auto &n = next[tid];
454+
455+ // Create generators for random index selection and binary decision.
456+ using distribution = std::uniform_int_distribution<size_t >;
447457 std::default_random_engine generator;
448- distribution dist;
458+ distribution dist_offset (0 , max_allocs - 1 );
459+ distribution dist_opt_type (0 , 1 );
449460 generator.seed (0 );
450- dist. param ( distribution::param_type ( 0 , max_allocs - 1 ));
461+
451462 auto sizeGenerator = base::alloc_sizes[tid];
463+ std::vector<size_t > free;
464+ std::vector<size_t > allocated;
465+ free.reserve (max_allocs / 2 );
466+ allocated.reserve (max_allocs / 2 );
467+ // Preallocate memory: initially, half the indices are allocated.
468+ // See prealloc() function;
469+ size_t i = 0 ;
470+ while (i < max_allocs / 2 ) {
471+ allocated.push_back (i++);
472+ }
473+ // The remaining indices are marked as free.
474+ while (i < max_allocs) {
475+ free.push_back (i++);
476+ }
452477
453478 n.clear ();
454479 for (int64_t j = 0 ; j < state.max_iterations * allocsPerIterations;
455480 j++) {
456- n.push_back ({dist (generator), sizeGenerator.nextSize ()});
481+ // Decide whether to allocate or free:
482+ // - If no allocations exist, allocation is forced.
483+ // - If there is maximum number of allocation, free is forced
484+ // - Otherwise, use a binary random choice (0 or 1)
485+ if (allocated.empty () ||
486+ (dist_opt_type (generator) == 0 && !free.empty ())) {
487+ // Allocation:
488+ std::swap (free[dist_offset (generator) % free.size ()],
489+ free.back ());
490+ auto offset = free.back ();
491+ free.pop_back ();
492+
493+ n.push_back ({true , offset, sizeGenerator.nextSize ()});
494+ allocated.push_back (offset);
495+ } else {
496+ // Free
497+ std::swap (allocated[dist_offset (generator) % allocated.size ()],
498+ allocated.back ());
499+ auto offset = allocated.back ();
500+ allocated.pop_back ();
501+
502+ n.push_back ({false , offset, 0 });
503+ free.push_back (offset);
504+ }
457505 }
506+
458507 next_iter[tid] = std::make_unique<next_alloc_data_iterator>(n.cbegin ());
459508 }
460509};
510+ // This class benchmarks performance by randomly allocating and freeing memory.
511+ // Initially, it slowly increases the memory footprint, and later decreases it.
512+ template <
513+ typename Size, typename Alloc,
514+ typename =
515+ std::enable_if_t <std::is_base_of<alloc_size_interface, Size>::value>,
516+ typename =
517+ std::enable_if_t <std::is_base_of<allocator_interface, Alloc>::value>>
518+ class peak_alloc_benchmark
519+ : public multiple_malloc_free_benchmark<Size, Alloc> {
520+ using base = multiple_malloc_free_benchmark<Size, Alloc>;
521+ virtual void prepareWorkload (benchmark::State &state) override {
522+ // Retrieve the thread index and corresponding operation buffer.
523+ auto tid = state.thread_index ();
524+ auto &n = this ->next [tid];
525+
526+ // Set up the random generators for index selection and decision making.
527+ std::default_random_engine generator;
528+ std::uniform_int_distribution<size_t > dist_offset (0 ,
529+ this ->max_allocs - 1 );
530+ std::uniform_real_distribution<double > dist_opt_type (0 , 1 );
531+ generator.seed (0 );
532+ auto sizeGenerator = this ->alloc_sizes [tid];
533+
534+ n.clear ();
535+ std::vector<size_t > free;
536+ std::vector<size_t > allocated;
537+ free.reserve (this ->max_allocs );
538+ // Initially, all indices are available.
539+ for (size_t i = 0 ; i < this ->max_allocs ; i++) {
540+ free.push_back (i);
541+ }
542+
543+ // Total number of allocation/free operations to simulate.
544+ int64_t operations_number =
545+ state.max_iterations * this ->allocsPerIterations ;
546+ for (int64_t j = 0 ; j < operations_number; j++) {
547+ int64_t target_allocation;
548+
549+ // Determine the target number of allocations based on the progress of the iterations.
550+ // In the first half of the iterations, the target allocation increases linearly.
551+ // In the second half, it decreases linearly.
552+ if (j < operations_number / 2 ) {
553+ target_allocation = 2 * static_cast <int64_t >(this ->max_allocs ) *
554+ j / operations_number;
555+ } else {
556+ target_allocation = -2 *
557+ static_cast <int64_t >(this ->max_allocs ) *
558+ j / operations_number +
559+ 2 * static_cast <int64_t >(this ->max_allocs );
560+ }
561+
562+ // x represents the gap between the target and current allocations.
563+ auto x = static_cast <double >(target_allocation -
564+ static_cast <double >(allocated.size ()));
565+
566+ // Use a normal CDF with high sigma so that when x is positive,
567+ // we are slightly more likely to allocate,
568+ // and when x is negative, slightly more likely to free memory,
569+ // keeping the overall change gradual.
570+
571+ const double sigma = 1000 ;
572+ auto cdf = normalCDF (x, sigma);
573+
574+ // Decide whether to allocate or free:
575+ // - If no allocations exist, allocation is forced.
576+ // - If there is maximum number of allocation, free is forced
577+ // - Otherwise, Based on the computed probability, choose whether to allocate or free
578+ if (allocated.empty () ||
579+ (!free.empty () && cdf > dist_opt_type (generator))) {
580+ // Allocation
581+ std::swap (free[dist_offset (generator) % free.size ()],
582+ free.back ());
583+ auto offset = free.back ();
584+ free.pop_back ();
585+ n.push_back ({true , offset, sizeGenerator.nextSize ()});
586+ allocated.push_back (offset);
587+ } else {
588+ // Free
589+ std::swap (allocated[dist_offset (generator) % allocated.size ()],
590+ allocated.back ());
591+ auto offset = allocated.back ();
592+ allocated.pop_back ();
593+ n.push_back ({false , offset, 0 });
594+ free.push_back (offset);
595+ }
596+ }
597+
598+ this ->next_iter [tid] =
599+ std::make_unique<std::vector<next_alloc_data>::const_iterator>(
600+ n.cbegin ());
601+ }
602+
603+ virtual void prealloc (benchmark::State &state) {
604+ auto tid = state.thread_index ();
605+ auto &i = base::allocations[tid];
606+ i.resize (base::max_allocs);
607+ }
608+ virtual std::string name () { return base::base::name () + " /peak_alloc" ; }
609+
610+ private:
611+ // Function to calculate the CDF of a normal distribution
612+ double normalCDF (double x, double sigma = 1.0 , double mu = 0.0 ) {
613+ return 0.5 * (1 + std::erf ((x - mu) / (sigma * std::sqrt (2.0 ))));
614+ }
615+ };
0 commit comments