Skip to content

Commit db35262

Browse files
committed
feat: prepared benchmark for batch-reduce gemm
1 parent 09ebaf5 commit db35262

File tree

3 files changed

+96
-0
lines changed

3 files changed

+96
-0
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ set(TEST_ARM_INSTRUCTION_FILES
151151
set(BENCH_FILES
152152
matmul_16_6_1.bench.cpp
153153
matmul_16_6_k.bench.cpp
154+
matmul.bench.cpp
154155
)
155156

156157
foreach(file ${SRC_MAIN_FILES})

src/test/kernels/matmul.bench.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#include "matmul.bench.h"
2+
#include "../../main/Brgemm.h"
3+
#include <benchmark/benchmark.h>
4+
5+
class GemmFixture : public benchmark::Fixture
6+
{
7+
public:
8+
std::vector<float> matrix_a, matrix_b, matrix_c;
9+
double flops;
10+
11+
void SetUp(::benchmark::State &state) override
12+
{
13+
flops = 0;
14+
15+
int M = state.range(0);
16+
int N = state.range(1);
17+
int K = state.range(2);
18+
19+
matrix_a.resize(M * K);
20+
matrix_b.resize(K * N);
21+
matrix_c.resize(M * N);
22+
23+
fill_random_matrix_args(matrix_a.data(), M * K);
24+
fill_random_matrix_args(matrix_b.data(), K * N);
25+
fill_random_matrix_args(matrix_c.data(), M * N);
26+
}
27+
28+
void TearDown(::benchmark::State &state) override
29+
{
30+
state.counters["FLOPS"] = benchmark::Counter(flops, benchmark::Counter::kIsRate);
31+
}
32+
};
33+
34+
BENCHMARK_DEFINE_F(GemmFixture, BM_matmul)(benchmark::State &state)
35+
{
36+
int M = state.range(0);
37+
int N = state.range(1);
38+
int K = state.range(2);
39+
40+
mini_jit::Brgemm brgemm;
41+
brgemm.generate(M, N, K, 1, 0, 0, 0, mini_jit::Brgemm::dtype_t::fp32);
42+
auto kernel = brgemm.get_kernel();
43+
44+
for (auto _ : state)
45+
{
46+
kernel(matrix_a.data(), matrix_b.data(), matrix_c.data(), M, 1, M, 1, 1);
47+
}
48+
49+
flops = M * N * K * 2 * state.iterations();
50+
}
51+
52+
static void CustomArguments(benchmark::internal::Benchmark *b)
53+
{
54+
for (int M = 16; M <= 64; M += 16)
55+
for (int N = 16; N <= 64; N += 16)
56+
for (int K : {1, 16, 32, 64, 128})
57+
b->Args({M, N, K});
58+
}
59+
60+
// ########## UNCOMMENT WHEN brgemm.generate() supports m, n < 16 ##########
61+
// static void CustomArguments(benchmark::internal::Benchmark *b)
62+
// {
63+
// for (int M = 1; M <= 64; M += 1)
64+
// for (int N = 1; N <= 64; N += 1)
65+
// for (int K : {1, 16, 32, 64, 128})
66+
// b->Args({M, N, K});
67+
// }
68+
69+
BENCHMARK_REGISTER_F(GemmFixture, BM_matmul)
70+
->ArgNames({"M", "N", "K"})
71+
->ReportAggregatesOnly(true)
72+
->Apply(CustomArguments)
73+
->MinWarmUpTime(1.0); // WarmUp in seconds

src/test/kernels/matmul.bench.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include <cstdint>
12
#include <cstdlib>
23
#include <ctime>
34

@@ -14,4 +15,25 @@ template <uint32_t TSize> void fill_random_matrix(float (&matrix)[TSize])
1415
{
1516
matrix[i] = (static_cast<float>(std::rand())) / (static_cast<float>(std::rand()));
1617
}
18+
}
19+
20+
/**
21+
* @brief Fill the given matrix with random values.
22+
*
23+
* @param matrix The matrix to write to.
24+
* @param matrix_size The number of elements in the matrix.
25+
*/
26+
inline void fill_random_matrix_args(float *matrix, size_t matrix_size)
27+
{
28+
static bool initialized = false;
29+
if (!initialized)
30+
{
31+
std::srand(std::time(0)); // Seed RNG once
32+
initialized = true;
33+
}
34+
35+
for (size_t i = 0; i < matrix_size; i++)
36+
{
37+
matrix[i] = (static_cast<float>(std::rand())) / (static_cast<float>(std::rand()));
38+
}
1739
}

0 commit comments

Comments
 (0)