diff --git a/SConstruct b/SConstruct index 63f829862..ccc313e10 100644 --- a/SConstruct +++ b/SConstruct @@ -287,6 +287,12 @@ AddOption('--override-targets', " pass a comma-separated list of target names," " e.g. 'pc,posix,posix_ext,gnu,libuv,openfec,...'")) +AddOption('--bench-profile', + dest='bench_profile', + action='store', + type='string', + help=('set benchmark profile: small, medium, or large')) + # configure even in dry run mode SCons.SConf.dryrun = 0 @@ -914,6 +920,23 @@ if meta.platform in ['windows']: ('_WIN32_WINNT', '0x0601'), ]) +# set roc_bench_profile +# controls the size of the benchmark data set +# it can be set to 'small', 'medium', or 'large' +# if not set, default is chosen based on architecture +bench_profile = GetOption('bench_profile') +if bench_profile: + profile = bench_profile.lower() + if profile == 'small': + env.AppendUnique(CPPDEFINES=['ROC_BENCHMARK_PROFILE_SMALL']) + elif profile == 'medium': + env.AppendUnique(CPPDEFINES=['ROC_BENCHMARK_PROFILE_MEDIUM']) + elif profile == 'large': + env.AppendUnique(CPPDEFINES=['ROC_BENCHMARK_PROFILE_LARGE']) + else: + env.Die("unknown --bench-profile '{}', expected one of: small, medium, large", + bench_profile) + # env will hold settings common to all code # subenvs will hold settings specific to particular parts of code subenv_names = 'internal_modules public_libs examples tools tests generated_code'.split() diff --git a/src/tests/bench_main.cpp b/src/tests/bench_main.cpp index 8ea25f0f7..812b0adb2 100644 --- a/src/tests/bench_main.cpp +++ b/src/tests/bench_main.cpp @@ -6,6 +6,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "bench_profile.h" #include "roc_core/crash_handler.h" #include "roc_core/heap_arena.h" #include "roc_core/log.h" diff --git a/src/tests/bench_profile.h b/src/tests/bench_profile.h new file mode 100644 index 000000000..99da321a0 --- /dev/null +++ b/src/tests/bench_profile.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2025 Roc Streaming authors + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#ifndef ROC_BENCH_PROFILE_H_ +#define ROC_BENCH_PROFILE_H_ + +#include "roc_core/cpu_traits.h" + +#if !defined(ROC_BENCHMARK_PROFILE_LARGE) && !defined(ROC_BENCHMARK_PROFILE_MEDIUM) \ + && !defined(ROC_BENCHMARK_PROFILE_SMALL) + +#ifndef ROC_CPU_FAMILY +#error \ + "ROC_CPU_FAMILY is not defined. Please define it to use ROC_BENCHMARK_PROFILE_* macros" +#endif + +// LARGE profile: High-performance workstation-class architectures +#if ROC_CPU_FAMILY == ROC_CPU_FAMILY_X86_64 || ROC_CPU_FAMILY == ROC_CPU_FAMILY_PPC64 \ + || ROC_CPU_FAMILY == ROC_CPU_FAMILY_S390X +#define ROC_BENCHMARK_PROFILE_LARGE +// MEDIUM profile: Powerful SBCs and capable architectures +#elif ROC_CPU_FAMILY == ROC_CPU_FAMILY_GENERIC || ROC_CPU_FAMILY == ROC_CPU_FAMILY_X86 \ + || ROC_CPU_FAMILY == ROC_CPU_FAMILY_PPC || ROC_CPU_FAMILY == ROC_CPU_FAMILY_S390 \ + || ROC_CPU_FAMILY == ROC_CPU_FAMILY_LOONGARCH64 \ + || ROC_CPU_FAMILY == ROC_CPU_FAMILY_AARCH64 \ + || ROC_CPU_FAMILY == ROC_CPU_FAMILY_MIPS64 \ + || ROC_CPU_FAMILY == ROC_CPU_FAMILY_RISCV64 +#define ROC_BENCHMARK_PROFILE_MEDIUM +#else +// SMALL profile: Weak CPUs, embedded systems, and specialized processors +#define ROC_BENCHMARK_PROFILE_SMALL +#endif + +#endif // !defined(ROC_BENCHMARK_PROFILE_...) + +#endif // ROC_BENCH_PROFILE_H_ \ No newline at end of file diff --git a/src/tests/roc_core/bench_mpsc_queue.cpp b/src/tests/roc_core/bench_mpsc_queue.cpp index 0e8859f5b..7e8b6268e 100644 --- a/src/tests/roc_core/bench_mpsc_queue.cpp +++ b/src/tests/roc_core/bench_mpsc_queue.cpp @@ -16,7 +16,16 @@ namespace roc { namespace core { namespace { -enum { BatchSize = 10000, NumIterations = 5000000, NumThreads = 16 }; +enum { + BatchSize = 10000, +#ifdef ROC_BENCHMARK_PROFILE_LARGE + NumIterations = 5000000, + NumThreads = 16 +#else + NumIterations = 500000, + NumThreads = 4 +#endif +}; #if defined(ROC_BENCHMARK_USE_ACCESSORS) inline int get_thread_index(const benchmark::State& state) { @@ -150,8 +159,10 @@ BENCHMARK_REGISTER_F(BM_MpscQueue, TryPopFront) ->Arg(1) ->Arg(2) ->Arg(4) +#ifdef ROC_BENCHMARK_PROFILE_LARGE ->Arg(8) ->Arg(16) +#endif ->Iterations(NumIterations) ->Unit(benchmark::kMicrosecond); @@ -184,8 +195,10 @@ BENCHMARK_REGISTER_F(BM_MpscQueue, PopFront) ->Arg(1) ->Arg(2) ->Arg(4) +#ifdef ROC_BENCHMARK_PROFILE_LARGE ->Arg(8) ->Arg(16) +#endif ->Iterations(NumIterations) ->Unit(benchmark::kMicrosecond); diff --git a/src/tests/roc_ctl/bench_task_queue_contention.cpp b/src/tests/roc_ctl/bench_task_queue_contention.cpp index f1f8452da..ce0e62b0e 100644 --- a/src/tests/roc_ctl/bench_task_queue_contention.cpp +++ b/src/tests/roc_ctl/bench_task_queue_contention.cpp @@ -17,9 +17,15 @@ namespace ctl { namespace { enum { +#ifdef ROC_BENCHMARK_PROFILE_LARGE NumScheduleIterations = 2000000, NumScheduleAfterIterations = 20000, NumThreads = 8, +#else + NumScheduleIterations = 200000, + NumScheduleAfterIterations = 5000, + NumThreads = 4, +#endif BatchSize = 1000 }; diff --git a/src/tests/roc_pipeline/bench_pipeline_loop_contention.cpp b/src/tests/roc_pipeline/bench_pipeline_loop_contention.cpp index 57b7be0b2..1bce3c5b8 100644 --- a/src/tests/roc_pipeline/bench_pipeline_loop_contention.cpp +++ b/src/tests/roc_pipeline/bench_pipeline_loop_contention.cpp @@ -31,7 +31,11 @@ namespace { enum { SampleRate = 1000000, // 1 sample = 1 us (for convenience) Chans = 0x1, +#ifdef ROC_BENCHMARK_PROFILE_LARGE NumThreads = 16, +#else + NumThreads = 4, +#endif NumIterations = 1000000, BatchSize = 10000, FrameBufSize = 100 diff --git a/src/tests/roc_pipeline/bench_pipeline_loop_peak_load.cpp b/src/tests/roc_pipeline/bench_pipeline_loop_peak_load.cpp index 7e7d69291..81a9b2efd 100644 --- a/src/tests/roc_pipeline/bench_pipeline_loop_peak_load.cpp +++ b/src/tests/roc_pipeline/bench_pipeline_loop_peak_load.cpp @@ -495,6 +495,8 @@ void BM_PipelinePeakLoad_PreciseSchedOff(benchmark::State& state) { task_thr.stop(); task_thr.join(); + pipeline.stop_and_wait(); + stats.export_counters(state); pipeline.export_counters(state); } @@ -525,6 +527,8 @@ void BM_PipelinePeakLoad_PreciseSchedOn(benchmark::State& state) { task_thr.stop(); task_thr.join(); + pipeline.stop_and_wait(); + stats.export_counters(state); pipeline.export_counters(state); }