diff --git a/aie_kernels/aie2p/mm.cc b/aie_kernels/aie2p/mm.cc index 0295088e639..14ab45e934e 100644 --- a/aie_kernels/aie2p/mm.cc +++ b/aie_kernels/aie2p/mm.cc @@ -16,6 +16,7 @@ #define REL_WRITE 0 #define REL_READ 1 +#include "../aie_kernel_utils.h" #include #include "zero.cc" @@ -74,14 +75,15 @@ static inline void matmul_vectorized_2x2_mmul(const T_in *__restrict pA, event0(); - for (unsigned z = 0; z < rowA; z += 2) - chess_prepare_for_pipelining chess_loop_range(4, ) { + AIE_PREPARE_FOR_PIPELINING + AIE_LOOP_MIN_ITERATION_COUNT(4) + for (unsigned z = 0; z < rowA; z += 2) { T_out *__restrict pC1 = pC + (z * colB) * MMUL::size_C; T_out *__restrict pC2 = pC + ((z + 1) * colB) * MMUL::size_C; for (unsigned j = 0; j < colB; j += 2) #ifdef OPT_PERF_ENABLED - chess_flatten_loop + AIE_LOOP_FLATTEN #endif { const T_in *__restrict pA1 = pA + (z * colA) * MMUL::size_A; @@ -119,7 +121,7 @@ static inline void matmul_vectorized_2x2_mmul(const T_in *__restrict pA, for (unsigned i = 0; i < colA; ++i) #ifdef OPT_PERF_ENABLED - chess_flatten_loop + AIE_LOOP_FLATTEN #endif { A0 = aie::load_v(pA1); @@ -437,4 +439,4 @@ extern "C" { combos(matmul_vectorized_c_func) combos(matmul_scalar_c_func) combos(zero_vectorized_c_func) combos(zero_scalar_c_func) -} // extern "C" \ No newline at end of file +} // extern "C"