fractalyze
diff --git a/‎benchmark/benchmark.bzl‎
Lines changed: 2 additions & 0 deletions b/‎benchmark/benchmark.bzl‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎benchmark/field/BUILD.bazel‎
Lines changed: 12 additions & 0 deletions b/‎benchmark/field/BUILD.bazel‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎benchmark/field/mul_benchmark.mlir‎
Lines changed: 19 additions & 0 deletions b/‎benchmark/field/mul_benchmark.mlir‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎benchmark/field/mul_benchmark_test.cc‎
Lines changed: 63 additions & 0 deletions b/‎benchmark/field/mul_benchmark_test.cc‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎benchmark/ntt/ntt_benchmark.mlir‎
Lines changed: 16 additions & 0 deletions b/‎benchmark/ntt/ntt_benchmark.mlir‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎benchmark/ntt/ntt_benchmark_test.cc‎
Lines changed: 63 additions & 9 deletions b/‎benchmark/ntt/ntt_benchmark_test.cc‎
Lines changed: 63 additions & 9 deletions
diff --git a/‎tests/Dialect/Field/prime_field_to_mod_arith.mlir‎
Lines changed: 64 additions & 0 deletions b/‎tests/Dialect/Field/prime_field_to_mod_arith.mlir‎
Lines changed: 64 additions & 0 deletions
diff --git a/‎tests/Dialect/ModArith/mod_arith_runner.mlir‎
Lines changed: 52 additions & 1 deletion b/‎tests/Dialect/ModArith/mod_arith_runner.mlir‎
Lines changed: 52 additions & 1 deletion
@@ -124,13 +124,15 @@ def zkir_benchmark_test(name, mlir_src, test_src, zkir_opt_flags = [], data = []
         name = llvmir_target,
         src = generated_zkir_opt_name,
         pass_flags = ["--mlir-to-llvmir"],
+        tags = tags,
         generated_filename = generated_llvmir_name,
     )
 
     llc(
         name = obj_name,
         src = generated_llvmir_name,
         pass_flags = ["-relocation-model=pic", "-filetype=obj"],
+        tags = tags,
         generated_filename = generated_obj_name,
     )
     cc_import(
 
@@ -0,0 +1,12 @@
+load("//benchmark:benchmark.bzl", "zkir_benchmark_test")
+
+zkir_benchmark_test(
+    name = "mul_benchmark_test",
+    mlir_src = "mul_benchmark.mlir",
+    tags = ["manual"],
+    test_src = ["mul_benchmark_test.cc"],
+    zkir_opt_flags = ["-poly-to-llvm"],
+    deps = [
+        "//benchmark:BenchmarkUtils",
+    ],
+)
@@ -0,0 +1,19 @@
+!mod = !mod_arith.int<21888242871839275222246405745257275088548364400416034343698204186575808495617 : i256>
+!F = !field.pf<21888242871839275222246405745257275088548364400416034343698204186575808495617 : i256>
+#mont = #mod_arith.montgomery<!mod>
+
+func.func @mul(%arg0 : i256) -> i256 attributes { llvm.emit_c_interface } {
+  %0 = field.pf.encapsulate %arg0 : i256 -> !F
+  %1 = field.pf.mul %0, %0 : !F
+  %2 = field.pf.mul %0, %1 : !F
+  %3 = field.pf.extract %2 : !F -> i256
+  return %3 : i256
+}
+
+func.func @mont_mul(%arg0 : i256) -> i256 attributes { llvm.emit_c_interface } {
+  %0 = field.pf.encapsulate %arg0 : i256 -> !F
+  %1 = field.pf.mont_mul %0, %0 {montgomery = #mont} : !F
+  %2 = field.pf.mont_mul %0, %1 {montgomery = #mont} : !F
+  %3 = field.pf.extract %2 : !F -> i256
+  return %3 : i256
+}
@@ -0,0 +1,63 @@
+#include "benchmark/BenchmarkUtils.h"
+#include "benchmark/benchmark.h"
+#include "gtest/gtest.h"
+
+namespace zkir {
+namespace {
+
+using ::zkir::benchmark::Memref;
+
+struct i256 {
+  uint64_t limbs[4];  // 4 x 64 = 256 bits
+};
+
+extern "C" void _mlir_ciface_mul(Memref<i256> *output, Memref<i256> *input);
+extern "C" void _mlir_ciface_mont_mul(Memref<i256> *output,
+                                      Memref<i256> *input);
+
+void BM_mul_benchmark(::benchmark::State &state) {
+  Memref<i256> input(1, 1);
+
+  input.pget(0, 0)->limbs[0] = 0x0032131ffffffffff;
+  input.pget(0, 0)->limbs[1] = 0x0032131ffffffffff;
+  input.pget(0, 0)->limbs[2] = 0x0032131ffffffffff;
+  input.pget(0, 0)->limbs[3] = 0x0032131ffffffffff;
+
+  Memref<i256> output(1, 1);
+  for (auto _ : state) {
+    _mlir_ciface_mul(&output, &input);
+  }
+}
+
+BENCHMARK(BM_mul_benchmark);
+
+void BM_mont_mul_benchmark(::benchmark::State &state) {
+  Memref<i256> input(1, 1);
+
+  input.pget(0, 0)->limbs[0] = 0x0032131ffffffffff;
+  input.pget(0, 0)->limbs[1] = 0x0032131ffffffffff;
+  input.pget(0, 0)->limbs[2] = 0x0032131ffffffffff;
+  input.pget(0, 0)->limbs[3] = 0x0032131ffffffffff;
+
+  Memref<i256> mont_output(1, 1);
+  for (auto _ : state) {
+    _mlir_ciface_mont_mul(&mont_output, &input);
+  }
+}
+
+BENCHMARK(BM_mont_mul_benchmark);
+
+}  // namespace
+}  // namespace zkir
+
+// Run on (14 X 24 MHz CPU s)
+// CPU Caches:
+//   L1 Data 64 KiB
+//   L1 Instruction 128 KiB
+//   L2 Unified 4096 KiB (x14)
+// Load Average: 7.70, 6.06, 6.06
+// ----------------------------------------------------------------
+// Benchmark                      Time             CPU   Iterations
+// ----------------------------------------------------------------
+// BM_mul_benchmark            2575 ns         2457 ns       294375
+// BM_mont_mul_benchmark       30.9 ns         30.2 ns     23041778
@@ -6,6 +6,10 @@
 #root_elem = #field.pf_elem<17220337697351015657950521176323262483320249231368149235373741788599650842711:i256> : !coeff_ty
 #root = #poly.primitive_root<root=#root_elem, degree=1048576:i256>
 
+!mod = !mod_arith.int<21888242871839275222246405745257275088548364400416034343698204186575808495617 : i256>
+#mont = #mod_arith.montgomery<!mod>
+#root_mont = #poly.primitive_root<root=#root_elem, degree=1048576:i256, montgomery=#mont>
+
 func.func @input_generation() -> !poly_ty attributes { llvm.emit_c_interface } {
   %c42 = arith.constant 6420 : i256
   %full = tensor.splat %c42 : !intt_ty
@@ -25,3 +29,15 @@ func.func @intt(%arg0 : !intt_ty) -> !poly_ty attributes { llvm.emit_c_interface
   %1 = poly.intt %0 {root=#root} : !coefft_ty -> !poly_ty
   return %1 :!poly_ty
 }
+
+func.func @ntt_mont(%arg0 : !poly_ty) -> !intt_ty attributes { llvm.emit_c_interface } {
+  %0 = poly.ntt %arg0 {root=#root_mont} : !poly_ty -> !coefft_ty
+  %1 = field.pf.extract %0 : !coefft_ty -> !intt_ty
+  return %1 : !intt_ty
+}
+
+func.func @intt_mont(%arg0 : !intt_ty) -> !poly_ty attributes { llvm.emit_c_interface } {
+  %0 = field.pf.encapsulate %arg0 : !intt_ty -> !coefft_ty
+  %1 = poly.intt %0 {root=#root_mont} : !coefft_ty -> !poly_ty
+  return %1 :!poly_ty
+}
@@ -17,6 +17,11 @@ extern "C" void _mlir_ciface_input_generation(Memref<i256> *output);
 extern "C" void _mlir_ciface_ntt(Memref<i256> *output, Memref<i256> *input);
 extern "C" void _mlir_ciface_intt(Memref<i256> *output, Memref<i256> *input);
 
+extern "C" void _mlir_ciface_ntt_mont(Memref<i256> *output,
+                                      Memref<i256> *input);
+extern "C" void _mlir_ciface_intt_mont(Memref<i256> *output,
+                                       Memref<i256> *input);
+
 void BM_ntt_benchmark(::benchmark::State &state) {
   Memref<i256> input(1, DEGREE);
   _mlir_ciface_input_generation(&input);
@@ -61,17 +66,66 @@ void BM_intt_benchmark(::benchmark::State &state) {
 // modifying the input. But I am not sure why ;(
 BENCHMARK(BM_intt_benchmark)->Iterations(1)->Unit(::benchmark::kSecond);
 
+void BM_ntt_mont_benchmark(::benchmark::State &state) {
+  Memref<i256> input(1, DEGREE);
+  _mlir_ciface_input_generation(&input);
+
+  Memref<i256> ntt(1, DEGREE);
+  for (auto _ : state) {
+    _mlir_ciface_ntt_mont(&ntt, &input);
+  }
+
+  Memref<i256> intt(1, DEGREE);
+  _mlir_ciface_intt_mont(&intt, &ntt);
+
+  for (int i = 0; i < DEGREE; i++) {
+    for (int j = 0; j < 4; j++) {
+      EXPECT_EQ(intt.pget(0, i)->limbs[j], input.pget(0, i)->limbs[j]);
+    }
+  }
+}
+
+BENCHMARK(BM_ntt_mont_benchmark)->Unit(::benchmark::kSecond);
+
+void BM_intt_mont_benchmark(::benchmark::State &state) {
+  Memref<i256> input(1, DEGREE);
+  _mlir_ciface_input_generation(&input);
+
+  Memref<i256> ntt(1, DEGREE);
+  _mlir_ciface_ntt_mont(&ntt, &input);
+
+  Memref<i256> intt(1, DEGREE);
+  for (auto _ : state) {
+    _mlir_ciface_intt_mont(&intt, &ntt);
+  }
+
+  for (int i = 0; i < DEGREE; i++) {
+    for (int j = 0; j < 4; j++) {
+      EXPECT_EQ(intt.pget(0, i)->limbs[j], input.pget(0, i)->limbs[j]);
+    }
+  }
+}
+
+// FIXME(batzor): It fails for more than 1 iteration so it seems like it is
+// modifying the input. But I am not sure why ;(
+BENCHMARK(BM_intt_mont_benchmark)->Iterations(1)->Unit(::benchmark::kSecond);
+
 }  // namespace
 }  // namespace zkir
 
+// clang-format off
+// NOLINTBEGIN(whitespace/line_length)
 // Run on (14 X 24 MHz CPU s)
 // CPU Caches:
-// L1 Data 64 KiB
-// L1 Instruction 128 KiB
-// L2 Unified 4096 KiB (x14)
-// Load Average: 22.54, 38.87, 26.62
-// -------------------------------------------------------------------------
-// Benchmark                               Time             CPU   Iterations
-// -------------------------------------------------------------------------
-// BM_ntt_benchmark                    0.321 s         0.320 s             2
-// BM_intt_benchmark/iterations:1      0.475 s         0.473 s             1
+//   L1 Data 64 KiB
+//   L1 Instruction 128 KiB
+//   L2 Unified 4096 KiB (x14)
+// Load Average: 27.66, 13.59, 9.67
+// ------------------------------------------------------------------------------
+// Benchmark                                    Time             CPU   Iterations
+// ------------------------------------------------------------------------------
+// BM_ntt_benchmark                         0.190 s         0.183 s             4
+// BM_intt_benchmark/iterations:1           0.381 s         0.368 s             1
+// BM_ntt_mont_benchmark                    0.221 s         0.214 s             3
+// BM_intt_mont_benchmark/iterations:1      0.415 s         0.396 s             1
+// NOLINTEND()
@@ -3,6 +3,9 @@
 !PFv = tensor<4x!PF1>
 #elem = #field.pf_elem<31:i32> : !PF1
 
+!mod = !mod_arith.int<3 : i32>
+#mont = #mod_arith.montgomery<!mod>
+
 // CHECK-LABEL: @test_lower_constant
 // CHECK-SAME: () -> [[T:.*]] {
 func.func @test_lower_constant() -> !PF1 {
@@ -52,6 +55,46 @@ func.func @test_lower_extract_vec(%lhs : tensor<4x!PF1>) -> tensor<4xi32> {
   return %res : tensor<4xi32>
 }
 
+// CHECK-LABEL: @test_lower_to_mont
+// CHECK-SAME: (%[[LHS:.*]]: [[T:.*]]) -> [[T]] {
+func.func @test_lower_to_mont(%lhs : !PF1) -> !PF1 {
+  // CHECK-NOT: field.pf.to_mont
+  // CHECK: %[[RES:.*]] = mod_arith.to_mont %[[LHS]] {montgomery = #mod_arith.montgomery<[[T]]>} : [[T]]
+  %res = field.pf.to_mont %lhs {montgomery=#mont} : !PF1
+  // CHECK: return %[[RES]] : [[T]]
+  return %res : !PF1
+}
+
+// CHECK-LABEL: @test_lower_to_mont_vec
+// CHECK-SAME: (%[[LHS:.*]]: [[T:.*]]) -> [[T]] {
+func.func @test_lower_to_mont_vec(%lhs : !PFv) -> !PFv {
+  // CHECK-NOT: field.pf.to_mont
+  // CHECK: %[[RES:.*]] = mod_arith.to_mont %[[LHS]] {montgomery = #mod_arith.montgomery<[[E:.*]]>} : [[T]]
+  %res = field.pf.to_mont %lhs {montgomery=#mont} : !PFv
+  // CHECK: return %[[RES]] : [[T]]
+  return %res : !PFv
+}
+
+// CHECK-LABEL: @test_lower_from_mont
+// CHECK-SAME: (%[[LHS:.*]]: [[T:.*]]) -> [[T]] {
+func.func @test_lower_from_mont(%lhs : !PF1) -> !PF1 {
+  // CHECK-NOT: field.pf.from_mont
+  // CHECK: %[[RES:.*]] = mod_arith.from_mont %[[LHS]] {montgomery = #mod_arith.montgomery<[[T]]>} : [[T]]
+  %res = field.pf.from_mont %lhs {montgomery=#mont} : !PF1
+  // CHECK: return %[[RES]] : [[T]]
+  return %res : !PF1
+}
+
+// CHECK-LABEL: @test_lower_from_mont_vec
+// CHECK-SAME: (%[[LHS:.*]]: [[T:.*]]) -> [[T]] {
+func.func @test_lower_from_mont_vec(%lhs : !PFv) -> !PFv {
+  // CHECK-NOT: field.pf.from_mont
+  // CHECK: %[[RES:.*]] = mod_arith.from_mont %[[LHS]] {montgomery = #mod_arith.montgomery<[[E:.*]]>} : [[T]]
+  %res = field.pf.from_mont %lhs {montgomery=#mont} : !PFv
+  // CHECK: return %[[RES]] : [[T]]
+  return %res : !PFv
+}
+
 // CHECK-LABEL: @test_lower_inverse
 // CHECK-SAME: (%[[LHS:.*]]: [[T:.*]]) -> [[T]] {
 func.func @test_lower_inverse(%lhs : !PF1) -> !PF1 {
@@ -135,6 +178,27 @@ func.func @test_lower_mul_vec(%lhs : !PFv, %rhs : !PFv) -> !PFv {
   return %res : !PFv
 }
 
+// CHECK-LABEL: @test_lower_mont_mul
+// CHECK-SAME: () -> [[T:.*]] {
+func.func @test_lower_mont_mul() -> !PF1 {
+  // CHECK: %[[C0:.*]] = mod_arith.constant 2 : [[T]]
+  %c0 = field.pf.constant 2 : !PF1
+  // CHECK: %[[RES:.*]] = mod_arith.mont_mul %[[C0]], %[[C0]] {montgomery = #mod_arith.montgomery<[[T]]>} : [[T]]
+  %res = field.pf.mont_mul %c0, %c0 {montgomery = #mont} : !PF1
+  // CHECK: return %[[RES]] : [[T]]
+  return %res : !PF1
+}
+
+// CHECK-LABEL: @test_lower_mont_mul_vec
+// CHECK-SAME: (%[[LHS:.*]]: [[T:.*]], %[[RHS:.*]]: [[T]]) -> [[T]] {
+func.func @test_lower_mont_mul_vec(%lhs : !PFv, %rhs : !PFv) -> !PFv {
+  // CHECK-NOT: field.pf.mont_mul
+  // CHECK: %[[RES:.*]] = mod_arith.mont_mul %[[LHS]], %[[RHS]] {montgomery = #mod_arith.montgomery<[[E:.*]]>}  : [[T]]
+  %res = field.pf.mont_mul %lhs, %rhs {montgomery = #mont} : !PFv
+  // CHECK: return %[[RES]] : [[T]]
+  return %res : !PFv
+}
+
 // CHECK-LABEL: @test_lower_constant_tensor
 // CHECK-SAME: () -> [[T:.*]] {
 func.func @test_lower_constant_tensor() -> !PFv {
 
@@ -1,8 +1,18 @@
-// RUN: zkir-opt %s --mod-arith-to-arith -convert-elementwise-to-linalg --one-shot-bufferize --convert-scf-to-cf --convert-cf-to-llvm --convert-to-llvm \
+// RUN: zkir-opt %s --mod-arith-to-arith -convert-elementwise-to-linalg --one-shot-bufferize --convert-scf-to-cf --convert-cf-to-llvm --convert-to-llvm --convert-vector-to-llvm \
 // RUN:   | mlir-runner -e test_lower_inverse -entry-point-result=void \
 // RUN:      --shared-libs="%mlir_lib_dir/libmlir_runner_utils%shlibext" > %t
 // RUN: FileCheck %s --check-prefix=CHECK_TEST_INVERSE < %t
 
+// RUN: zkir-opt %s --mod-arith-to-arith -convert-elementwise-to-linalg --one-shot-bufferize --convert-scf-to-cf --convert-cf-to-llvm --convert-to-llvm --convert-vector-to-llvm \
+// RUN:   | mlir-runner -e test_lower_mont_reduce -entry-point-result=void \
+// RUN:      --shared-libs="%mlir_lib_dir/libmlir_runner_utils%shlibext" > %t
+// RUN: FileCheck %s --check-prefix=CHECK_TEST_MONT_REDUCE < %t
+
+// RUN: zkir-opt %s --mod-arith-to-arith -convert-elementwise-to-linalg --one-shot-bufferize --convert-scf-to-cf --convert-cf-to-llvm --convert-to-llvm --convert-vector-to-llvm \
+// RUN:   | mlir-runner -e test_lower_mont_mul -entry-point-result=void \
+// RUN:      --shared-libs="%mlir_lib_dir/libmlir_runner_utils%shlibext" > %t
+// RUN: FileCheck %s --check-prefix=CHECK_TEST_MONT_MUL < %t
+
 !Fr = !mod_arith.int<2147483647:i32>
 
 func.func private @printMemrefI32(memref<*xi32>) attributes { llvm.emit_c_interface }
@@ -20,3 +30,44 @@ func.func @test_lower_inverse() {
 }
 
 // CHECK_TEST_INVERSE: [1324944920]
+
+!Fq = !mod_arith.int<21888242871839275222246405745257275088548364400416034343698204186575808495617 : i256>
+#Fq_mont = #mod_arith.montgomery<!Fq>
+
+func.func @test_lower_mont_reduce() {
+  %p = arith.constant 3723 : i512
+  %p_mont = mod_arith.mont_reduce %p {montgomery=#Fq_mont} : i512 -> !Fq
+
+  %2 = mod_arith.extract %p_mont : !Fq -> i256
+  %3 = vector.from_elements %2 : vector<1xi256>
+  %4 = vector.bitcast %3 : vector<1xi256> to vector<8xi32>
+  %mem = memref.alloc() : memref<8xi32>
+  %idx_0 = arith.constant 0 : index
+  vector.store %4, %mem[%idx_0] : memref<8xi32>, vector<8xi32>
+
+  %U = memref.cast %mem : memref<8xi32> to memref<*xi32>
+  func.call @printMemrefI32(%U) : (memref<*xi32>) -> ()
+  return
+}
+
+// CHECK_TEST_MONT_REDUCE: [-1635059004, -1772563805, -2074116324, -156049350, 156881531, -524227392, -1359481138, 438709201]
+
+func.func @test_lower_mont_mul() {
+  %p = mod_arith.constant 17221657567640823606390383439573883756117969501024189775361 : !Fq
+  %p_mont = mod_arith.to_mont %p {montgomery=#Fq_mont} : !Fq
+  %p_mont_sq = mod_arith.mont_mul %p_mont, %p_mont {montgomery=#Fq_mont} : !Fq
+  %p_sq = mod_arith.from_mont %p_mont_sq {montgomery=#Fq_mont} : !Fq
+
+  %2 = mod_arith.extract %p_sq : !Fq -> i256
+  %3 = vector.from_elements %2 : vector<1xi256>
+  %4 = vector.bitcast %3 : vector<1xi256> to vector<8xi32>
+  %mem = memref.alloc() : memref<8xi32>
+  %idx_0 = arith.constant 0 : index
+  vector.store %4, %mem[%idx_0] : memref<8xi32>, vector<8xi32>
+
+  %U = memref.cast %mem : memref<8xi32> to memref<*xi32>
+  func.call @printMemrefI32(%U) : (memref<*xi32>) -> ()
+  return
+}
+
+// CHECK_TEST_MONT_MUL: [-1717936988, -857005375, 1976922116, -1939796685, 1587159113, 557631023, 126776667, 742573744]