Skip to content

Commit 4565d96

Browse files
committed
feat: benchmarks and docu
1 parent 35c25d5 commit 4565d96

File tree

10 files changed

+930
-199
lines changed

10 files changed

+930
-199
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,5 +63,5 @@ jobs:
6363
ctest -j ${{env.parallel_processes}} -T memcheck -C ${{matrix.build_type}} --test-dir submission_25_05_08 --output-on-failure
6464
ctest -j ${{env.parallel_processes}} -T memcheck -C ${{matrix.build_type}} --test-dir submission_25_05_15 --output-on-failure
6565
ctest -j ${{env.parallel_processes}} -T memcheck -C ${{matrix.build_type}} --test-dir submission_25_05_22 --output-on-failure
66-
ctest -j ${{env.parallel_processes}} -T memcheck -C ${{matrix.build_type}} --output-on-failure -E "^Test *(gemm generation|unary|tensor operation with outer loop)"
66+
ctest -j ${{env.parallel_processes}} -T memcheck -C ${{matrix.build_type}} --output-on-failure -E "^Test *(gemm generation|unary)"
6767

CMakeLists.txt

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,10 @@ set(TEST_ARM_INSTRUCTION_FILES
228228
)
229229

230230
set(BENCH_FILES
231+
TensorOperation.bench.cpp
232+
)
233+
234+
set(BENCH_KERNLES_FILES
231235
matmul_16_6_1.bench.cpp
232236
matmul_16_6_k.bench.cpp
233237
matmul.bench.cpp
@@ -261,8 +265,12 @@ endforeach()
261265
foreach(file ${TEST_ARM_INSTRUCTION_FILES})
262266
list(APPEND TEST_FILEPATHS src/test/arm_instructions/${file})
263267
endforeach()
264-
268+
265269
foreach(file ${BENCH_FILES})
270+
list(APPEND BENCH_FILEPATHS src/test/${file})
271+
endforeach()
272+
273+
foreach(file ${BENCH_KERNELS_FILES})
266274
list(APPEND BENCH_FILEPATHS src/test/kernels/${file})
267275
endforeach()
268276

docs_sphinx/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Machine Learning Compilers
2525
submissions/report_25_05_08.rst
2626
submissions/report_25_05_15.rst
2727
submissions/report_25_05_22.rst
28+
submissions/report_25_05_29.rst
2829

2930
.. toctree::
3031
:maxdepth: 4

docs_sphinx/submissions/report_25_05_29.rst

Lines changed: 309 additions & 1 deletion
Large diffs are not rendered by default.

src/main/TensorOperation.cpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,6 @@ mini_jit::TensorOperation::error_t mini_jit::TensorOperation::setup(dtype_t dtyp
448448
{
449449
main_kernel.emplace<Unary>();
450450
TensorOperation::prim_main = prim_main;
451-
indexPrimK = indexPrimN;
452451

453452
Unary::error_t error = generateUnary(std::get<Unary>(main_kernel), prim_main, dim_sizes);
454453

@@ -525,7 +524,6 @@ void mini_jit::TensorOperation::execute_dimension(int64_t index_dim, char const
525524
int64_t stride_in1 = isUnary(prim_main) ? 1 : strides_in1[index_dim];
526525
int64_t stride_out = strides_out[index_dim];
527526

528-
std::cout << "Execute check " << index_dim + 1 << " " << std::endl;
529527
if (exec_types[index_dim] == exec_t::seq)
530528
{
531529
release_assert(exec_types[index_dim] == exec_t::seq, "Expected a sequential loop");
@@ -535,7 +533,7 @@ void mini_jit::TensorOperation::execute_dimension(int64_t index_dim, char const
535533

536534
for (int64_t iDim = 0; iDim < dim_size; iDim++)
537535
{
538-
if (dim_types[iDim] == dim_t::k)
536+
if (dim_types[index_dim] == dim_t::k)
539537
{
540538
is_first = first_access && (iDim == 0);
541539
is_last = last_access && (iDim == (dim_size - 1));
@@ -556,7 +554,6 @@ void mini_jit::TensorOperation::execute_dimension(int64_t index_dim, char const
556554
{
557555
if (std::holds_alternative<Unary>(first_touch))
558556
{
559-
std::cout << "First touch: indexPrimN" << indexPrimN << " " << strides_out[indexPrimN] << std::endl;
560557
Unary::kernel_t kernel = std::get<Unary>(first_touch).get_kernel();
561558
kernel(ptr_out, ptr_out, strides_out[indexPrimN], strides_out[indexPrimN]);
562559
}
@@ -571,14 +568,11 @@ void mini_jit::TensorOperation::execute_dimension(int64_t index_dim, char const
571568
{
572569
if (std::holds_alternative<Unary>(main_kernel))
573570
{
574-
std::cout << "Unary: indexPrimN " << indexPrimN << " " << strides_in0[indexPrimN] << " " << strides_out[indexPrimN] << std::endl;
575571
Unary::kernel_t kernel = std::get<Unary>(main_kernel).get_kernel();
576572
kernel(ptr_in0, ptr_out, strides_in0[indexPrimN], strides_out[indexPrimN]);
577573
}
578574
else if (std::holds_alternative<Brgemm>(main_kernel))
579575
{
580-
std::cout << "Gemm: indexPrimN " << indexPrimN << " " << "indexPrimK " << indexPrimK << " " << "indexPrimBatch " << indexPrimBatch
581-
<< " " << strides_in0[indexPrimK] << " " << strides_in1[indexPrimN] << " " << strides_out[indexPrimN] << std::endl;
582576
Brgemm::kernel_t kernel = std::get<Brgemm>(main_kernel).get_kernel();
583577

584578
if (prim_main == prim_t::gemm)
@@ -606,8 +600,6 @@ void mini_jit::TensorOperation::execute_dimension(int64_t index_dim, char const
606600
{
607601
if (std::holds_alternative<Unary>(last_touch))
608602
{
609-
std::cout << "Last touch: indexPrimK" << indexPrimK << " " << strides_in0[indexPrimK] << " " << strides_out[indexPrimN]
610-
<< std::endl;
611603
Unary::kernel_t kernel = std::get<Unary>(last_touch).get_kernel();
612604
kernel(ptr_out, ptr_out, strides_out[indexPrimN], strides_out[indexPrimN]);
613605
}

src/main/Unary.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,14 +76,12 @@ mini_jit::Unary::kernel_t mini_jit::Unary::get_kernel() const
7676

7777
void mini_jit::Unary::fill_with_zero_unary_column_major_fp32(uint32_t m, uint32_t n)
7878
{
79-
std::cout << "1: zero" << std::endl;
8079
kernels::unary_zero(native_kernel, m / 16, n, m % 16); // logic of zero_16m_n combined with rest processing
8180
return;
8281
}
8382

8483
void mini_jit::Unary::identity_unary_fp32(uint32_t m, uint32_t n, uint32_t trans_b)
8584
{
86-
std::cout << "1: identity" << std::endl;
8785
if (trans_b == 1)
8886
{
8987
kernels::unary_identity_transpose(native_kernel, m, n);
@@ -97,7 +95,6 @@ void mini_jit::Unary::identity_unary_fp32(uint32_t m, uint32_t n, uint32_t trans
9795

9896
void mini_jit::Unary::relu_unary_fp32(uint32_t m, uint32_t n, uint32_t trans_b)
9997
{
100-
std::cout << "1: relu" << std::endl;
10198
if (trans_b == 1)
10299
{
103100
kernels::unary_relu_transpose(native_kernel, m, n);

src/test/BaseGeneration.test.cpp

Lines changed: 42 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ void GenerationTest::fill_counting_matrix(float *matrix, uint32_t size)
3535
void GenerationTest::naive_matmul_M_N_K_Batch(const float *__restrict__ a, const float *__restrict__ b, float *__restrict__ c, int64_t lda,
3636
int64_t ldb, int64_t ldc, int64_t batch_stride_a, int64_t batch_stride_b)
3737
{
38+
REQUIRE(isSetup == true);
3839
for (size_t iB = 0; iB < BatchSize; iB++)
3940
{
4041
for (size_t iM = 0; iM < M; iM++)
@@ -71,7 +72,6 @@ GenerationTest::GenerationTest(uint32_t M, uint32_t N, uint32_t K) : GenerationT
7172
{
7273
}
7374

74-
7575
GenerationTest::GenerationTest(uint32_t M, uint32_t N, uint32_t K, uint32_t lda, uint32_t ldb, uint32_t ldc)
7676
: GenerationTest(M, N, K, 1, lda, ldb, ldc, lda * K, ldb * N)
7777
{
@@ -122,56 +122,56 @@ void GenerationTest::SetUp(TestInfill fillType)
122122
}
123123

124124
std::copy(matrix_c.begin(), matrix_c.end(), matrix_c_verify.begin());
125+
isSetup = true;
125126
}
126127

127128
void GenerationTest::naive_unary_M_N(const float *a, float *b, int64_t lda, int64_t ldb, bool trans_b, UnaryType type)
128129
{
129-
for (size_t iK = 0; iK < K; iK++)
130+
REQUIRE(isSetup == true);
131+
132+
for (size_t iN = 0; iN < N; iN++)
130133
{
131-
for (size_t iN = 0; iN < N; iN++)
134+
for (size_t iM = 0; iM < M; iM++)
132135
{
133-
for (size_t iM = 0; iM < M; iM++)
136+
switch (type)
134137
{
135-
switch (type)
138+
case UnaryType::Zero:
139+
if (trans_b == true)
140+
{
141+
b[ldb * iM + iN] = 0;
142+
}
143+
else
144+
{
145+
b[ldb * iN + iM] = 0;
146+
}
147+
148+
break;
149+
150+
case UnaryType::Identity:
151+
if (trans_b == true)
136152
{
137-
case UnaryType::Zero:
138-
if (trans_b == true)
139-
{
140-
b[ldb * iM + iN] = 0;
141-
}
142-
else
143-
{
144-
b[ldb * iN + iM] = 0;
145-
}
146-
147-
break;
148-
149-
case UnaryType::Identity:
150-
if (trans_b == true)
151-
{
152-
b[ldb * iM + iN] = a[lda * iK + iM];
153-
}
154-
else
155-
{
156-
b[ldb * iN + iM] = a[lda * iK + iM];
157-
}
158-
break;
159-
160-
case UnaryType::ReLu:
161-
if (trans_b == true)
162-
{
163-
b[ldb * iM + iN] = std::max(a[lda * iK + iM], 0.f);
164-
}
165-
else
166-
{
167-
b[ldb * iN + iM] = std::max(a[lda * iK + iM], 0.f);
168-
}
169-
break;
170-
171-
default:
172-
FAIL("Found unary invalid type for testing");
173-
break;
153+
b[ldb * iM + iN] = a[lda * iN + iM];
174154
}
155+
else
156+
{
157+
b[ldb * iN + iM] = a[lda * iN + iM];
158+
}
159+
break;
160+
161+
case UnaryType::ReLu:
162+
if (trans_b == true)
163+
{
164+
b[ldb * iM + iN] = std::max(a[lda * iN + iM], 0.f);
165+
}
166+
else
167+
{
168+
b[ldb * iN + iM] = std::max(a[lda * iN + iM], 0.f);
169+
}
170+
break;
171+
172+
default:
173+
FAIL("Found unary invalid type for testing");
174+
break;
175175
}
176176
}
177177
}

src/test/BaseGeneration.test.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ class GenerationTest
4141
std::vector<float> matrix_c;
4242
std::vector<float> matrix_c_verify;
4343
mini_jit::Brgemm::kernel_t kernel = nullptr;
44+
bool isSetup = false;
4445

4546
/**
4647
* @brief Fills the given matrix with random values.

0 commit comments

Comments
 (0)