@@ -17,6 +17,11 @@ extern "C" void _mlir_ciface_input_generation(Memref<i256> *output);
1717extern " C" void _mlir_ciface_ntt (Memref<i256> *output, Memref<i256> *input);
1818extern " C" void _mlir_ciface_intt (Memref<i256> *output, Memref<i256> *input);
1919
20+ extern " C" void _mlir_ciface_ntt_mont (Memref<i256> *output,
21+ Memref<i256> *input);
22+ extern " C" void _mlir_ciface_intt_mont (Memref<i256> *output,
23+ Memref<i256> *input);
24+
2025void BM_ntt_benchmark (::benchmark::State &state) {
2126 Memref<i256> input (1 , DEGREE);
2227 _mlir_ciface_input_generation (&input);
@@ -61,17 +66,66 @@ void BM_intt_benchmark(::benchmark::State &state) {
6166// modifying the input. But I am not sure why ;(
6267BENCHMARK (BM_intt_benchmark)->Iterations (1 )->Unit(::benchmark::kSecond );
6368
69+ void BM_ntt_mont_benchmark (::benchmark::State &state) {
70+ Memref<i256> input (1 , DEGREE);
71+ _mlir_ciface_input_generation (&input);
72+
73+ Memref<i256> ntt (1 , DEGREE);
74+ for (auto _ : state) {
75+ _mlir_ciface_ntt_mont (&ntt, &input);
76+ }
77+
78+ Memref<i256> intt (1 , DEGREE);
79+ _mlir_ciface_intt_mont (&intt, &ntt);
80+
81+ for (int i = 0 ; i < DEGREE; i++) {
82+ for (int j = 0 ; j < 4 ; j++) {
83+ EXPECT_EQ (intt.pget (0 , i)->limbs [j], input.pget (0 , i)->limbs [j]);
84+ }
85+ }
86+ }
87+
88+ BENCHMARK (BM_ntt_mont_benchmark)->Unit (::benchmark::kSecond );
89+
90+ void BM_intt_mont_benchmark (::benchmark::State &state) {
91+ Memref<i256> input (1 , DEGREE);
92+ _mlir_ciface_input_generation (&input);
93+
94+ Memref<i256> ntt (1 , DEGREE);
95+ _mlir_ciface_ntt_mont (&ntt, &input);
96+
97+ Memref<i256> intt (1 , DEGREE);
98+ for (auto _ : state) {
99+ _mlir_ciface_intt_mont (&intt, &ntt);
100+ }
101+
102+ for (int i = 0 ; i < DEGREE; i++) {
103+ for (int j = 0 ; j < 4 ; j++) {
104+ EXPECT_EQ (intt.pget (0 , i)->limbs [j], input.pget (0 , i)->limbs [j]);
105+ }
106+ }
107+ }
108+
109+ // FIXME(batzor): It fails for more than 1 iteration so it seems like it is
110+ // modifying the input. But I am not sure why ;(
111+ BENCHMARK (BM_intt_mont_benchmark)->Iterations (1 )->Unit(::benchmark::kSecond );
112+
64113} // namespace
65114} // namespace zkir
66115
116+ // clang-format off
117+ // NOLINTBEGIN(whitespace/line_length)
67118// Run on (14 X 24 MHz CPU s)
68119// CPU Caches:
69- // L1 Data 64 KiB
70- // L1 Instruction 128 KiB
71- // L2 Unified 4096 KiB (x14)
72- // Load Average: 22.54, 38.87, 26.62
73- // -------------------------------------------------------------------------
74- // Benchmark Time CPU Iterations
75- // -------------------------------------------------------------------------
76- // BM_ntt_benchmark 0.321 s 0.320 s 2
77- // BM_intt_benchmark/iterations:1 0.475 s 0.473 s 1
120+ // L1 Data 64 KiB
121+ // L1 Instruction 128 KiB
122+ // L2 Unified 4096 KiB (x14)
123+ // Load Average: 27.66, 13.59, 9.67
124+ // ------------------------------------------------------------------------------
125+ // Benchmark Time CPU Iterations
126+ // ------------------------------------------------------------------------------
127+ // BM_ntt_benchmark 0.190 s 0.183 s 4
128+ // BM_intt_benchmark/iterations:1 0.381 s 0.368 s 1
129+ // BM_ntt_mont_benchmark 0.221 s 0.214 s 3
130+ // BM_intt_mont_benchmark/iterations:1 0.415 s 0.396 s 1
131+ // NOLINTEND()
0 commit comments