diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 12ed355..e174912 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -85,9 +85,8 @@ jobs: - name: Run the benchmarks uses: CodSpeedHQ/action@main if: matrix.codspeed-mode != 'off' - env: - CODSPEED_PERF_ENABLED: true with: + mode: ${{ matrix.codspeed-mode }} run: examples/google_benchmark_cmake/build/benchmark_example token: ${{ secrets.CODSPEED_TOKEN }} @@ -124,9 +123,8 @@ jobs: - name: Run the benchmarks uses: CodSpeedHQ/action@main if: matrix.codspeed-mode != 'off' - env: - CODSPEED_PERF_ENABLED: true with: + mode: ${{ matrix.codspeed-mode }} run: bazel run //examples/google_benchmark_bazel:my_benchmark --//core:codspeed_mode=${{ matrix.codspeed-mode }} token: ${{ secrets.CODSPEED_TOKEN }} diff --git a/core/BUILD b/core/BUILD index 75c1d8f..d1cdef2 100644 --- a/core/BUILD +++ b/core/BUILD @@ -29,6 +29,8 @@ cc_library( "-Wno-unused-parameter", "-Wno-unused-but-set-variable", "-Wno-type-limits", + "-Wno-format", + "-Wno-format-security", ], }), visibility = ["//visibility:public"], diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 6a2bea3..21b2639 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -35,6 +35,8 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") -Wno-unused-parameter -Wno-unused-but-set-variable -Wno-type-limits + -Wno-format + -Wno-format-security ) elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") target_compile_options( diff --git a/core/include/measurement.hpp b/core/include/measurement.hpp index 89de7be..1644415 100644 --- a/core/include/measurement.hpp +++ b/core/include/measurement.hpp @@ -1,6 +1,7 @@ #ifndef MEASUREMENT_H #define MEASUREMENT_H +#include #include #ifdef _WIN32 #include @@ -49,4 +50,23 @@ ALWAYS_INLINE void measurement_set_executed_benchmark(const std::string& name) { instrument_hooks_executed_benchmark(g_hooks, current_pid, name.c_str()); } +ALWAYS_INLINE uint64_t measurement_current_timestamp() { + return instrument_hooks_current_timestamp(); +} + +ALWAYS_INLINE int8_t measurement_add_marker(uint8_t marker_type, + uint64_t timestamp) { + auto pid = getpid(); + return instrument_hooks_add_marker(g_hooks, pid, marker_type, timestamp); +} + +ALWAYS_INLINE void measurement_add_benchmark_timestamps(uint64_t start, + uint64_t end) { + assert(start <= end); + assert(start != 0 && end != 0); + + measurement_add_marker(MARKER_TYPE_BENCHMARK_START, start); + measurement_add_marker(MARKER_TYPE_BENCHMARK_END, end); +} + #endif // MEASUREMENT_H diff --git a/core/instrument-hooks b/core/instrument-hooks index a9ae7a4..b1e401a 160000 --- a/core/instrument-hooks +++ b/core/instrument-hooks @@ -1 +1 @@ -Subproject commit a9ae7a4d897dbb694cb0355c9994141a7c4a1ab9 +Subproject commit b1e401a4d031ad308edb22ed59a52253a1ebe924 diff --git a/examples/google_benchmark_bazel/helper.hpp b/examples/google_benchmark_bazel/helper.hpp new file mode 120000 index 0000000..bc2f02c --- /dev/null +++ b/examples/google_benchmark_bazel/helper.hpp @@ -0,0 +1 @@ +../google_benchmark_cmake/helper.hpp \ No newline at end of file diff --git a/examples/google_benchmark_bazel/multithread_bench.hpp b/examples/google_benchmark_bazel/multithread_bench.hpp new file mode 120000 index 0000000..5709a0d --- /dev/null +++ b/examples/google_benchmark_bazel/multithread_bench.hpp @@ -0,0 +1 @@ +../google_benchmark_cmake/multithread_bench.hpp \ No newline at end of file diff --git a/examples/google_benchmark_bazel/pause_timing_bench.hpp b/examples/google_benchmark_bazel/pause_timing_bench.hpp new file mode 120000 index 0000000..24e68c9 --- /dev/null +++ b/examples/google_benchmark_bazel/pause_timing_bench.hpp @@ -0,0 +1 @@ +../google_benchmark_cmake/pause_timing_bench.hpp \ No newline at end of file diff --git a/examples/google_benchmark_cmake/helper.hpp b/examples/google_benchmark_cmake/helper.hpp new file mode 100644 index 0000000..2be319c --- /dev/null +++ b/examples/google_benchmark_cmake/helper.hpp @@ -0,0 +1,26 @@ +#ifndef HELPER_HPP +#define HELPER_HPP + +#ifdef _MSC_VER +#define NOINLINE __declspec(noinline) +#else +#define NOINLINE __attribute__((noinline)) +#endif + +#include + +#include +#include +#include + +NOINLINE static uint64_t recursive_fib(int n) { + if (n <= 1) return n; + return recursive_fib(n - 1) + recursive_fib(n - 2); +} + +NOINLINE static uint64_t expensive_operation() { + // Expensive recursive compuation that will dominate flamegraph + return 42 + recursive_fib(30); +} + +#endif // HELPER_HPP diff --git a/examples/google_benchmark_cmake/main.cpp b/examples/google_benchmark_cmake/main.cpp index 9dd39dd..525cebd 100644 --- a/examples/google_benchmark_cmake/main.cpp +++ b/examples/google_benchmark_cmake/main.cpp @@ -4,6 +4,8 @@ #include "fibonacci_bench.hpp" #include "fixture_bench.hpp" +#include "multithread_bench.hpp" +#include "pause_timing_bench.hpp" #include "sleep_bench.hpp" #include "template_bench.hpp" diff --git a/examples/google_benchmark_cmake/multithread_bench.hpp b/examples/google_benchmark_cmake/multithread_bench.hpp new file mode 100644 index 0000000..a67b769 --- /dev/null +++ b/examples/google_benchmark_cmake/multithread_bench.hpp @@ -0,0 +1,41 @@ +#pragma once + +#include + +#include +#include + +#include "helper.hpp" + +// Simple multithreaded benchmark: spawn thread, do work, join +static void BM_SimpleMultithread(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + std::atomic work_start{false}; + std::atomic work_done{false}; + + std::thread worker([&]() { + // Wait for the signal to start + while (!work_start.load()) { + std::this_thread::yield(); + } + + // Do some CPU work + benchmark::DoNotOptimize(expensive_operation()); + work_done.store(true); + }); + state.ResumeTiming(); + + // Signal the worker to start + work_start.store(true); + + // Wait for worker to complete + while (!work_done.load()) { + std::this_thread::yield(); + } + + worker.join(); + } +} + +BENCHMARK(BM_SimpleMultithread); diff --git a/examples/google_benchmark_cmake/pause_timing_bench.hpp b/examples/google_benchmark_cmake/pause_timing_bench.hpp new file mode 100644 index 0000000..5687470 --- /dev/null +++ b/examples/google_benchmark_cmake/pause_timing_bench.hpp @@ -0,0 +1,40 @@ +#pragma once + +#include + +#include +#include + +#include "helper.hpp" + +NOINLINE static uint64_t actual_work() { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + return 42; +} + +static void BM_large_setup_teardown(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + benchmark::DoNotOptimize(expensive_operation()); + state.ResumeTiming(); + + benchmark::DoNotOptimize(actual_work()); + + state.PauseTiming(); + benchmark::DoNotOptimize(expensive_operation()); + state.ResumeTiming(); + } +} +// IMPORTANT: Use fixed iterations, otherwise we'll run for 10+ minutes +BENCHMARK(BM_large_setup_teardown)->Iterations(100); + +static void BM_large_setup(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + benchmark::DoNotOptimize(expensive_operation()); + state.ResumeTiming(); + + benchmark::DoNotOptimize(actual_work()); + } +} +BENCHMARK(BM_large_setup)->Iterations(100); diff --git a/google_benchmark/include/benchmark/benchmark.h b/google_benchmark/include/benchmark/benchmark.h index f30977b..0e7d96d 100644 --- a/google_benchmark/include/benchmark/benchmark.h +++ b/google_benchmark/include/benchmark/benchmark.h @@ -949,6 +949,9 @@ class BENCHMARK_EXPORT BENCHMARK_INTERNAL_CACHELINE_ALIGNED State { #if defined(CODSPEED_INSTRUMENTATION) || defined(CODSPEED_WALLTIME) codspeed::CodSpeed* codspeed_; #endif +#ifdef CODSPEED_WALLTIME + uint64_t resume_timestamp_; +#endif private: bool started_; @@ -1044,11 +1047,13 @@ struct State::StateIterator { private: friend class State; BENCHMARK_ALWAYS_INLINE - StateIterator() : cached_(0), parent_() {} + StateIterator() : cached_(0), parent_() + {} BENCHMARK_ALWAYS_INLINE explicit StateIterator(State* st) - : cached_(st->skipped() ? 0 : st->max_iterations), parent_(st) {} + : cached_(st->skipped() ? 0 : st->max_iterations), parent_(st) + {} public: BENCHMARK_ALWAYS_INLINE @@ -1063,7 +1068,9 @@ struct State::StateIterator { BENCHMARK_ALWAYS_INLINE bool operator!=(StateIterator const&) const { - if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) return true; + if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) { + return true; + } #ifdef CODSPEED_INSTRUMENTATION measurement_stop(); #endif diff --git a/google_benchmark/src/benchmark.cc b/google_benchmark/src/benchmark.cc index 91c737a..5a7dbb7 100644 --- a/google_benchmark/src/benchmark.cc +++ b/google_benchmark/src/benchmark.cc @@ -19,6 +19,10 @@ #include "codspeed.h" #include "internal_macros.h" +#ifdef CODSPEED_WALLTIME +#include "measurement.hpp" +#endif + #ifndef BENCHMARK_OS_WINDOWS #if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) #include @@ -185,6 +189,9 @@ State::State(std::string name, IterationCount max_iters, max_iterations(max_iters), #if defined(CODSPEED_INSTRUMENTATION) || defined(CODSPEED_WALLTIME) codspeed_(codspeed), +#endif +#ifdef CODSPEED_WALLTIME + resume_timestamp_(0), #endif started_(false), finished_(false), @@ -252,9 +259,21 @@ State::State(std::string name, IterationCount max_iters, } void State::PauseTiming() { +#ifdef CODSPEED_WALLTIME + uint64_t pause_timestamp = measurement_current_timestamp(); +#endif + // Add in time accumulated so far BM_CHECK(started_ && !finished_ && !skipped()); timer_->StopTimer(); + +#ifdef CODSPEED_WALLTIME + if (resume_timestamp_ != 0) { + measurement_add_benchmark_timestamps(resume_timestamp_, pause_timestamp); + resume_timestamp_ = 0; + } +#endif + if (perf_counters_measurement_ != nullptr) { std::vector> measurements; if (!perf_counters_measurement_->Stop(measurements)) { @@ -276,6 +295,11 @@ void State::ResumeTiming() { if (perf_counters_measurement_ != nullptr) { perf_counters_measurement_->Start(); } + +#ifdef CODSPEED_WALLTIME + BM_CHECK(resume_timestamp_ == 0); + resume_timestamp_ = measurement_current_timestamp(); +#endif } void State::SkipWithMessage(const std::string& msg) {