Integer-Ctrl
diff --git a/‎.clang-format‎
Lines changed: 2 additions & 1 deletion b/‎.clang-format‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/main/Brgemm.cpp‎
Lines changed: 63 additions & 65 deletions b/‎src/main/Brgemm.cpp‎
Lines changed: 63 additions & 65 deletions
diff --git a/‎src/main/Brgemm.h‎
Lines changed: 6 additions & 18 deletions b/‎src/main/Brgemm.h‎
Lines changed: 6 additions & 18 deletions
@@ -1,12 +1,13 @@
 BasedOnStyle: LLVM
 IndentWidth: 2
 ContinuationIndentWidth: 2
+SpacesBeforeTrailingComments: 2
 UseTab: Never
 BreakBeforeBraces: Allman
 AllowShortIfStatementsOnASingleLine: false
 AllowShortLoopsOnASingleLine: false
 AllowShortFunctionsOnASingleLine: None
-SpacesBeforeParens: ControlStatements
+SpaceBeforeParens: ControlStatements
 AlwaysBreakAfterReturnType: None
 BinPackArguments: true
 BinPackParameters: true
 
@@ -1,87 +1,85 @@
 #include "Brgemm.h"
-#include "kernels/matmuls_all.h"
 #include "Kernel.h"
-#include <stdexcept>
+#include "kernels/matmuls_all.h"
 #include <format>
+#include <stdexcept>
 
-mini_jit::Brgemm::error_t mini_jit::Brgemm::generate(uint32_t m, uint32_t n, uint32_t k, uint32_t br_size,
-    uint32_t trans_a, uint32_t trans_b, uint32_t trans_c, dtype_t  dtype)
+mini_jit::Brgemm::error_t mini_jit::Brgemm::generate(uint32_t m, uint32_t n, uint32_t k, uint32_t br_size, uint32_t trans_a,
+                                                     uint32_t trans_b, uint32_t trans_c, dtype_t dtype)
 {
-    if (dtype != dtype_t::fp32)
-    {
-        return error_t::err_wrong_dtype;
-    }
-    if (m % 16 != 0 || (n < 4))
-    {
-        return error_t::err_wrong_dimension;
-    }
-    if ((trans_a + trans_b + trans_c) != 0)
-    {
-        return error_t::err_row_major_order_not_supported;
-    }
-    if (br_size != 1)
-    {
-        return error_t::err_batch_reduce_size_not_supported;
-    }
+  if (dtype != dtype_t::fp32)
+  {
+    return error_t::err_wrong_dtype;
+  }
+  if (m % 16 != 0 || (n < 4))
+  {
+    return error_t::err_wrong_dimension;
+  }
+  if ((trans_a + trans_b + trans_c) != 0)
+  {
+    return error_t::err_row_major_order_not_supported;
+  }
+  if (br_size != 1)
+  {
+    return error_t::err_batch_reduce_size_not_supported;
+  }
 
-    if (br_size == 1 && (trans_a + trans_b + trans_c) == 0 && dtype == dtype_t::fp32)
-    {
-        fill_with_matmuls_no_batch_dim_column_major_fp32(m, n, k);
-    }
-    else
-    {
-        throw std::logic_error(std::format(
-            "Unhandled parameter combination found: m='{}', n='{}', k='{}', br_size='{}', trans_a='{}', trans_b='{}', "
-            "trans_c = '{}', dtype = '{}'", m, n, k, br_size, trans_a, trans_b, trans_c, static_cast<int32_t>(dtype)));
-    }
+  if (br_size == 1 && (trans_a + trans_b + trans_c) == 0 && dtype == dtype_t::fp32)
+  {
+    fill_with_matmuls_no_batch_dim_column_major_fp32(m, n, k);
+  }
+  else
+  {
+    throw std::logic_error(
+      std::format("Unhandled parameter combination found: m='{}', n='{}', k='{}', br_size='{}', trans_a='{}', trans_b='{}', "
+                  "trans_c = '{}', dtype = '{}'",
+                  m, n, k, br_size, trans_a, trans_b, trans_c, static_cast<int32_t>(dtype)));
+  }
 
+  native_kernel.set_kernel();
+  kernel = reinterpret_cast<kernel_t>(const_cast<void *>(native_kernel.get_kernel()));  // Properly cast from const void* to kernel_t
 
-
-    native_kernel.set_kernel();
-    kernel = reinterpret_cast<kernel_t>(const_cast<void*>(native_kernel.get_kernel()));  // Properly cast from const void* to kernel_t
-
-    return error_t::success;
+  return error_t::success;
 }
 
 mini_jit::Brgemm::kernel_t mini_jit::Brgemm::get_kernel() const
 {
-    return kernel;
+  return kernel;
 }
 
 void mini_jit::Brgemm::fill_with_matmuls_no_batch_dim_column_major_fp32(uint32_t m, uint32_t n, uint32_t k)
 {
-    // Always sort from the specific to the more general case
+  // Always sort from the specific to the more general case
 
-    if (m == 16 && n == 6 && k == 1)
-    {
-        kernels::matmul_16_6_1(native_kernel);
-        return;
-    }
+  if (m == 16 && n == 6 && k == 1)
+  {
+    kernels::matmul_16_6_1(native_kernel);
+    return;
+  }
 
-    if (m == 16 && n == 6)
-    {
-        kernels::matmul_16_6_k(native_kernel, k);
-        return;
-    }
+  if (m == 16 && n == 6)
+  {
+    kernels::matmul_16_6_k(native_kernel, k);
+    return;
+  }
 
-    if (m >= 16 && m % 16 == 0 && n >= 4 && n % 4 == 0)
-    {
-        kernels::matmul_16m_4n_k(native_kernel, m / 16, n / 4, k);
-        return;
-    }
+  if (m >= 16 && m % 16 == 0 && n >= 4 && n % 4 == 0)
+  {
+    kernels::matmul_16m_4n_k(native_kernel, m / 16, n / 4, k);
+    return;
+  }
 
-    if (m >= 16 && m % 16 == 0 && n >= 4)
-    {
-        // At this point n % 4 != 0
-        kernels::matmul_16m_4nRest_k(native_kernel, m / 16, n / 4, k, n % 4);
-        return;
-    }
+  if (m >= 16 && m % 16 == 0 && n >= 4)
+  {
+    // At this point n % 4 != 0
+    kernels::matmul_16m_4nRest_k(native_kernel, m / 16, n / 4, k, n % 4);
+    return;
+  }
 
-    if (m >= 16 && n >= 4 && n % 4 == 0)
-    {
-        // At this point m % 16 != 0
-        kernels::matmul_16mRest_4n_k(native_kernel, m / 16, n / 4, k, m % 16);
-        return;
-    }
+  if (m >= 16 && n >= 4 && n % 4 == 0)
+  {
+    // At this point m % 16 != 0
+    kernels::matmul_16mRest_4n_k(native_kernel, m / 16, n / 4, k, m % 16);
+    return;
+  }
 }
-
@@ -1,12 +1,12 @@
 #ifndef MINI_JIT_BRGEMM_H
 #define MINI_JIT_BRGEMM_H
 
-#include <cstdint>
 #include "Kernel.h"
+#include <cstdint>
 
 namespace mini_jit
 {
-class Brgemm;
+  class Brgemm;
 }
 
 class mini_jit::Brgemm
@@ -25,14 +25,8 @@ class mini_jit::Brgemm
    * - br_stride_a: stride between two A matrices (in elements, not bytes).
    * - br_stride_b: stride between two B matrices (in elements, not bytes).
    */
-  using kernel_t = void (*)(void const* a,
-    void const* b,
-    void* c,
-    int64_t lda,
-    int64_t ldb,
-    int64_t ldc,
-    int64_t br_stride_a,
-    int64_t br_stride_b);
+  using kernel_t = void (*)(void const *a, void const *b, void *c, int64_t lda, int64_t ldb, int64_t ldc, int64_t br_stride_a,
+                            int64_t br_stride_b);
 
   /// data type
   enum class dtype_t : uint32_t
@@ -63,14 +57,8 @@ class mini_jit::Brgemm
    * @param dtype data type of the matrices.
    * @return error_t::success on success, another error_t value otherwise.
    **/
-  error_t generate(uint32_t m,
-    uint32_t n,
-    uint32_t k,
-    uint32_t br_size,
-    uint32_t trans_a,
-    uint32_t trans_b,
-    uint32_t trans_c,
-    dtype_t dtype);
+  error_t generate(uint32_t m, uint32_t n, uint32_t k, uint32_t br_size, uint32_t trans_a, uint32_t trans_b, uint32_t trans_c,
+                   dtype_t dtype);
 
   /**
    * @brief Get the generated kernel: C += sum_i(A_i * B_i).