Skip to content

Commit d1d1918

Browse files
committed
Rename CHECK_CUDA_INPUT macro
1 parent eaa4fea commit d1d1918

File tree

1 file changed

+29
-29
lines changed

1 file changed

+29
-29
lines changed

include/flashinfer/trtllm/fused_moe/DevKernel.h

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333

3434
namespace moe::dev {
3535

36-
#define CHECK_CUDA(cmd) \
36+
#define CHECK_CUDA_INPUT(cmd) \
3737
do { \
3838
cudaError_t e = cmd; \
3939
if (e != cudaSuccess) { \
@@ -46,34 +46,34 @@ namespace moe::dev {
4646

4747
#define LAUNCH_ESC(...) __VA_ARGS__
4848

49-
#define LAUNCH_PDL(data, coopLaunch, types, kernel, numBlocks, numThreads, smemSize, stream) \
50-
cudaLaunchConfig_t config{}; \
51-
config.gridDim = numBlocks; \
52-
config.blockDim = numThreads; \
53-
config.dynamicSmemBytes = smemSize; \
54-
config.stream = (cudaStream_t)stream; \
55-
\
56-
cudaLaunchAttribute attributes[2] = {}; \
57-
attributes[0].id = cudaLaunchAttributeProgrammaticStreamSerialization; \
58-
attributes[0].val.programmaticStreamSerializationAllowed = int(data.mUsePdl); \
59-
attributes[1].id = cudaLaunchAttributeCooperative; \
60-
attributes[1].val.cooperative = int(coopLaunch); \
61-
config.attrs = attributes; \
62-
config.numAttrs = 2; \
63-
if (data.mUsePdl) { \
64-
auto params = KernelParams<types, true>::setKernelParams(data); \
65-
auto kernelTyped = kernel<KernelParams<types, true>>; \
66-
if (smemSize > 48 * 1024) \
67-
CHECK_CUDA(cudaFuncSetAttribute(kernelTyped, cudaFuncAttributeMaxDynamicSharedMemorySize, \
68-
smemSize)); \
69-
CHECK_CUDA(cudaLaunchKernelEx(&config, kernelTyped, params)); \
70-
} else { \
71-
auto params = KernelParams<types, false>::setKernelParams(data); \
72-
auto kernelTyped = kernel<KernelParams<types, false>>; \
73-
if (smemSize > 48 * 1024) \
74-
CHECK_CUDA(cudaFuncSetAttribute(kernelTyped, cudaFuncAttributeMaxDynamicSharedMemorySize, \
75-
smemSize)); \
76-
CHECK_CUDA(cudaLaunchKernelEx(&config, kernelTyped, params)); \
49+
#define LAUNCH_PDL(data, coopLaunch, types, kernel, numBlocks, numThreads, smemSize, stream) \
50+
cudaLaunchConfig_t config{}; \
51+
config.gridDim = numBlocks; \
52+
config.blockDim = numThreads; \
53+
config.dynamicSmemBytes = smemSize; \
54+
config.stream = (cudaStream_t)stream; \
55+
\
56+
cudaLaunchAttribute attributes[2] = {}; \
57+
attributes[0].id = cudaLaunchAttributeProgrammaticStreamSerialization; \
58+
attributes[0].val.programmaticStreamSerializationAllowed = int(data.mUsePdl); \
59+
attributes[1].id = cudaLaunchAttributeCooperative; \
60+
attributes[1].val.cooperative = int(coopLaunch); \
61+
config.attrs = attributes; \
62+
config.numAttrs = 2; \
63+
if (data.mUsePdl) { \
64+
auto params = KernelParams<types, true>::setKernelParams(data); \
65+
auto kernelTyped = kernel<KernelParams<types, true>>; \
66+
if (smemSize > 48 * 1024) \
67+
CHECK_CUDA_INPUT(cudaFuncSetAttribute( \
68+
kernelTyped, cudaFuncAttributeMaxDynamicSharedMemorySize, smemSize)); \
69+
CHECK_CUDA_INPUT(cudaLaunchKernelEx(&config, kernelTyped, params)); \
70+
} else { \
71+
auto params = KernelParams<types, false>::setKernelParams(data); \
72+
auto kernelTyped = kernel<KernelParams<types, false>>; \
73+
if (smemSize > 48 * 1024) \
74+
CHECK_CUDA_INPUT(cudaFuncSetAttribute( \
75+
kernelTyped, cudaFuncAttributeMaxDynamicSharedMemorySize, smemSize)); \
76+
CHECK_CUDA_INPUT(cudaLaunchKernelEx(&config, kernelTyped, params)); \
7777
}
7878

7979
#define LAUNCH(data, kernel, numBlocks, numThreads, smemSize, stream) \

0 commit comments

Comments
 (0)