Skip to content

Commit cfc8c75

Browse files
authored
refactor: use __ldlu to load/store data and refactor code for moe permute kernels (#432)
1 parent 2f00a93 commit cfc8c75

File tree

4 files changed

+134
-119
lines changed

4 files changed

+134
-119
lines changed

src/kernels/attention/fast_cast.cuh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,9 @@
44
#include <cuda_bf16.h>
55
#include <cuda_fp16.h>
66

7+
#include <cute/numeric/numeric_types.hpp>
78
#include <cute/tensor.hpp>
89

9-
#include "cute/numeric/numeric_types.hpp"
10-
1110
namespace llm {
1211

1312
namespace detail {

src/kernels/moe/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ cc_library(
77
SRCS
88
topk_softmax_kernel.cu
99
grouped_topk_sigmoid_kernel.cu
10-
permute_kernel.cu
10+
permutation_index_kernel.cu
1111
DEPS
1212
cutlass
1313
glog::glog
@@ -21,7 +21,7 @@ cc_test(
2121
SRCS
2222
topk_softmax_kernel_test.cu
2323
grouped_topk_sigmoid_kernel_test.cu
24-
permute_kernel_test.cu
24+
permutation_kernel_test.cu
2525
DEPS
2626
:moe.kernels
2727
absl::random_random

0 commit comments

Comments
 (0)