Skip to content

Commit c50e2e7

Browse files
committed
Fix: Destroy CUDA events
1 parent 91d6c3b commit c50e2e7

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

less_slow.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -769,6 +769,13 @@ static void sorting_with_cub(bm::State &state) {
769769
state.SetIterationTime(milliseconds / 1000.0f);
770770
}
771771

772+
// ! All of the following and above calls can fail, so consider checking the codes
773+
// ! and using a different kernel launch mechanism: https://ashvardanian.com/posts/less-wrong-cuda-hello-world/
774+
cudaEventDestroy(start_event);
775+
cudaEventDestroy(stop_event);
776+
cudaStreamDestroy(sorting_stream);
777+
cudaFree(temporary_pointer);
778+
772779
state.SetComplexityN(count);
773780
state.SetItemsProcessed(count * state.iterations());
774781
state.SetBytesProcessed(count * state.iterations() * sizeof(std::uint32_t));
@@ -1448,7 +1455,7 @@ f32x4x4_t f32x4x4_matmul_unrolled_kernel(f32x4x4_t const &a_matrix, f32x4x4_t co
14481455
f32x4x4_t c_matrix;
14491456
float const(&a)[4][4] = a_matrix.scalars;
14501457
float const(&b)[4][4] = b_matrix.scalars;
1451-
float(&c)[4][4] = c_matrix.scalars;
1458+
float (&c)[4][4] = c_matrix.scalars;
14521459

14531460
c[0][0] = a[0][0] * b[0][0] + a[0][1] * b[1][0] + a[0][2] * b[2][0] + a[0][3] * b[3][0];
14541461
c[0][1] = a[0][0] * b[0][1] + a[0][1] * b[1][1] + a[0][2] * b[2][1] + a[0][3] * b[3][1];

0 commit comments

Comments
 (0)