1+ #include < climits>
2+ #include < random>
3+
14#include " benchmark/BenchmarkUtils.h"
25#include " benchmark/benchmark.h"
36#include " gtest/gtest.h"
@@ -9,9 +12,22 @@ namespace {
912
1013using ::zkir::benchmark::Memref;
1114
12- struct i256 {
13- uint64_t limbs[4 ]; // 4 x 64 = 256 bits
14- };
15+ using i256 = benchmark::BigInt<4 >;
16+
17+ // `kPrime` =
18+ // 21888242871839275222246405745257275088548364400416034343698204186575808495617
19+ const i256 kPrime = i256::fromHexString(
20+ " 0x30644e72e131a029b85045b68181585d2833e84879b9709143e1f593f0000001" );
21+
22+ // Fill the input with random numbers in [0, prime).
23+ static void fillWithRandom (Memref<i256> *input, const i256 &kPrime ) {
24+ // Set up the random number generator.
25+ std::mt19937_64 rng (std::random_device{}()); // NOLINT(whitespace/braces)
26+ std::uniform_int_distribution<uint64_t > dist (0 , UINT64_MAX);
27+ for (int i = 0 ; i < NUM_COEFFS; i++) {
28+ *input->pget (0 , i) = i256::randomLT (kPrime , rng, dist);
29+ }
30+ }
1531
1632extern " C" void _mlir_ciface_input_generation (Memref<i256> *output);
1733extern " C" void _mlir_ciface_ntt (Memref<i256> *output, Memref<i256> *input);
@@ -25,6 +41,7 @@ extern "C" void _mlir_ciface_intt_mont(Memref<i256> *output,
2541void BM_ntt_benchmark (::benchmark::State &state) {
2642 Memref<i256> input (1 , NUM_COEFFS);
2743 _mlir_ciface_input_generation (&input);
44+ fillWithRandom (&input, kPrime );
2845
2946 Memref<i256> ntt (1 , NUM_COEFFS);
3047 for (auto _ : state) {
@@ -46,6 +63,7 @@ BENCHMARK(BM_ntt_benchmark)->Unit(::benchmark::kSecond);
4663void BM_intt_benchmark (::benchmark::State &state) {
4764 Memref<i256> input (1 , NUM_COEFFS);
4865 _mlir_ciface_input_generation (&input);
66+ fillWithRandom (&input, kPrime );
4967
5068 Memref<i256> ntt (1 , NUM_COEFFS);
5169 _mlir_ciface_ntt (&ntt, &input);
@@ -69,6 +87,7 @@ BENCHMARK(BM_intt_benchmark)->Iterations(1)->Unit(::benchmark::kSecond);
6987void BM_ntt_mont_benchmark (::benchmark::State &state) {
7088 Memref<i256> input (1 , NUM_COEFFS);
7189 _mlir_ciface_input_generation (&input);
90+ fillWithRandom (&input, kPrime );
7291
7392 Memref<i256> ntt (1 , NUM_COEFFS);
7493 for (auto _ : state) {
@@ -90,6 +109,7 @@ BENCHMARK(BM_ntt_mont_benchmark)->Unit(::benchmark::kSecond);
90109void BM_intt_mont_benchmark (::benchmark::State &state) {
91110 Memref<i256> input (1 , NUM_COEFFS);
92111 _mlir_ciface_input_generation (&input);
112+ fillWithRandom (&input, kPrime );
93113
94114 Memref<i256> ntt (1 , NUM_COEFFS);
95115 _mlir_ciface_ntt_mont (&ntt, &input);
@@ -120,12 +140,12 @@ BENCHMARK(BM_intt_mont_benchmark)->Iterations(1)->Unit(::benchmark::kSecond);
120140// L1 Data 64 KiB
121141// L1 Instruction 128 KiB
122142// L2 Unified 4096 KiB (x14)
123- // Load Average: 27.66, 13.59, 9.67
143+ // Load Average: 7.37, 7.53, 7.10
124144// ------------------------------------------------------------------------------
125145// Benchmark Time CPU Iterations
126146// ------------------------------------------------------------------------------
127- // BM_ntt_benchmark 0.190 s 0.183 s 4
128- // BM_intt_benchmark/iterations:1 0.381 s 0.368 s 1
129- // BM_ntt_mont_benchmark 0.221 s 0.214 s 3
130- // BM_intt_mont_benchmark/iterations:1 0.415 s 0.396 s 1
147+ // BM_ntt_benchmark 10.4 s 10.3 s 1
148+ // BM_intt_benchmark/iterations:1 12.1 s 11.4 s 1
149+ // BM_ntt_mont_benchmark 0.201 s 0.197 s 3
150+ // BM_intt_mont_benchmark/iterations:1 1.38 s 1.35 s 1
131151// NOLINTEND()
0 commit comments