Skip to content

Commit 4a86921

Browse files
committed
fix: unary_zero_16m
1 parent f2a2cfb commit 4a86921

File tree

7 files changed

+22
-22
lines changed

7 files changed

+22
-22
lines changed

CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,8 @@ set(KERNEL_FILES
110110
br_matmul_lt16_lt4nRest_k.cpp
111111

112112
unary/unary_all.h
113-
unary/unary_zero_4m_n.h
114-
unary/unary_zero_4m_n.cpp
113+
unary/unary_zero_16m_n.h
114+
unary/unary_zero_16m_n.cpp
115115
)
116116

117117
set(ARM_INSTRUCTION_FILES
@@ -174,7 +174,7 @@ set(TEST_KERNELS
174174

175175
unary/unary.test.h
176176
unary/unary.test.cpp
177-
unary/unary_zero_4m_n.test.cpp
177+
unary/unary_zero_16m_n.test.cpp
178178
)
179179

180180
set(TEST_ARM_INSTRUCTION_FILES

src/main/kernels/unary/unary_all.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#ifndef MINI_JIT_KERNELS_UNARY_ALL_H
22
#define MINI_JIT_KERNELS_UNARY_ALL_H
33

4-
#include "unary_zero_4m_n.h"
4+
#include "unary_zero_16m_n.h"
55

66
#endif // MINI_JIT_KERNELS_UNARY_ALL_H

src/main/kernels/unary/unary_zero_4m_n.cpp renamed to src/main/kernels/unary/unary_zero_16m_n.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
#include "unary_zero_4m_n.h"
1+
#include "unary_zero_16m_n.h"
22
#include "../../arm_instructions/arm_all.h"
33

4-
void mini_jit::kernels::unary_zero_4m_n(mini_jit::Kernel &kernel, const uint32_t m_loop_4, const uint32_t n_loop,
5-
const bool use_init_and_end)
4+
void mini_jit::kernels::unary_zero_16m_n(mini_jit::Kernel &kernel, const uint32_t m_loop_16, const uint32_t n_loop,
5+
const bool use_init_and_end)
66
{
77
using namespace mini_jit::arm_instructions;
88

9-
release_assert(m_loop_4 != 0, "Cannot use a matrix with a m loop of size zero.");
9+
release_assert(m_loop_16 != 0, "Cannot use a matrix with a m loop of size zero.");
1010
release_assert(n_loop != 0, "Cannot use a matrix with a n loop of size zero.");
1111

1212
if (use_init_and_end)
@@ -40,8 +40,8 @@ void mini_jit::kernels::unary_zero_4m_n(mini_jit::Kernel &kernel, const uint32_t
4040
// Offset the used leading dimension by the size of floats
4141
lsl(x3, x3, 2), // x3 * 4 = x3 * sizeof(float)
4242

43-
mov(x8, x1), // Store the inital value of x1, to be restored in the N loop
44-
mov(x9, 4 * 16), // 4 * 16Byte Hold the number of bytes that are stored in the loop
43+
mov(x8, x1), // Store the inital value of x1, to be restored in the N loop
44+
mov(x9, 4 * 4 * 4), // 4 * 4 * sizeof(float) Hold the number of bytes that are stored in the loop
4545

4646
// Zero four register so we can fill the matrix with zeros
4747
eor(v0, t16b, v0, t16b, v0, t16b), // Zero the v0 register
@@ -57,10 +57,10 @@ void mini_jit::kernels::unary_zero_4m_n(mini_jit::Kernel &kernel, const uint32_t
5757
// loop over n
5858
sub(x16, x16, 1),
5959

60-
mov(x1, x9), // Restore x1 for the m loop
60+
mov(x1, x8), // Restore x1 for the m loop
6161

6262
// x17 iterator for the m_loop
63-
mov(x17, m_loop_4),
63+
mov(x17, m_loop_16),
6464
// loop over m
6565
sub(x17, x17, 1),
6666

@@ -70,7 +70,7 @@ void mini_jit::kernels::unary_zero_4m_n(mini_jit::Kernel &kernel, const uint32_t
7070
cbnz(x17, -2 * 4),
7171

7272
// Updates for the matrix B
73-
add(x9, x3, x9), // lda + initial position
73+
add(x8, x3, x8), // lda + initial position
7474

7575
// loop back to n
7676
cbnz(x16, -7 * 4),
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#ifndef MINI_JIT_KERNELS_UNARY_ZERO_M_4N_H
2-
#define MINI_JIT_KERNELS_UNARY_ZERO_M_4N_H
1+
#ifndef MINI_JIT_KERNELS_UNARY_ZERO_M_16N_H
2+
#define MINI_JIT_KERNELS_UNARY_ZERO_M_16N_H
33

44
#include "../../Kernel.h"
55
#include <cstdint>
@@ -12,11 +12,11 @@ namespace mini_jit
1212
* @brief Generates a M x 4*N unary zero kernel.
1313
*
1414
* @param kernel The kernel to add instructions too.
15-
* @param m_loop_4 The repetitions of the m block of size.
15+
* @param m_loop_16 The repetitions of the m block of size.
1616
* @param n_loop The repetitions of the n block of size 4.
1717
* @param use_init_and_end Indicates if the procedural call standard, initializing setup and the ret instruction are used. Defaults to
1818
*/
19-
void unary_zero_4m_n(mini_jit::Kernel &kernel, const uint32_t m_loop_4, const uint32_t n_loop, const bool use_init_and_end = true);
19+
void unary_zero_16m_n(mini_jit::Kernel &kernel, const uint32_t m_loop_16, const uint32_t n_loop, const bool use_init_and_end = true);
2020

2121
} // namespace kernels
2222
} // namespace mini_jit
-68 Bytes
Binary file not shown.
Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
1-
#include "../../../main/kernels/unary/unary_zero_4m_n.h"
1+
#include "../../../main/kernels/unary/unary_zero_16m_n.h"
22
#include "unary.test.h"
33
#include <catch2/catch_test_macros.hpp>
44
#include <catch2/generators/catch_generators.hpp>
55
#include <catch2/generators/catch_generators_range.hpp>
66
#include <cstdint>
77

8-
TEST_CASE("Test unary zero_4m_n jited correctness random data", "[jit][correctness][gemm]")
8+
TEST_CASE("Test unary zero_16m_n jited correctness random data", "[jit][correctness][gemm]")
99
{
10-
const uint32_t M = GENERATE(16, 64, 512, 2048);
11-
const uint32_t N = GENERATE(16, 50, 64, 512, 2048);
10+
const uint32_t M = GENERATE(64, 512, 2048);
11+
const uint32_t N = GENERATE(50, 64, 512, 2048);
1212
CAPTURE(M, N);
1313
UnaryTestFixture unaryTest(M, N);
1414
unaryTest.SetUp(TestInfill::Random);
15-
mini_jit::kernels::unary_zero_4m_n(unaryTest.native_kernel, M / 4, N);
15+
mini_jit::kernels::unary_zero_16m_n(unaryTest.native_kernel, M / 16, N);
1616
unaryTest.RunTest(M, M, UnaryType::Zero);
1717
}
-68 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)