@@ -25,9 +25,32 @@ void RTDEF(CUFLaunchKernel)(const void *kernel, intptr_t gridX, intptr_t gridY,
2525 blockDim.x = blockX;
2626 blockDim.y = blockY;
2727 blockDim.z = blockZ;
28- cudaStream_t stream = 0 ;
28+ cudaStream_t stream = 0 ; // TODO stream managment
2929 CUDA_REPORT_IF_ERROR (
3030 cudaLaunchKernel (kernel, gridDim, blockDim, params, smem, stream));
3131}
3232
33+ void RTDEF (CUFLaunchClusterKernel)(const void *kernel, intptr_t clusterX,
34+ intptr_t clusterY, intptr_t clusterZ, intptr_t gridX, intptr_t gridY,
35+ intptr_t gridZ, intptr_t blockX, intptr_t blockY, intptr_t blockZ,
36+ int32_t smem, void **params, void **extra) {
37+ cudaLaunchConfig_t config;
38+ config.gridDim .x = gridX;
39+ config.gridDim .y = gridY;
40+ config.gridDim .z = gridZ;
41+ config.blockDim .x = blockX;
42+ config.blockDim .y = blockY;
43+ config.blockDim .z = blockZ;
44+ config.dynamicSmemBytes = smem;
45+ config.stream = 0 ; // TODO stream managment
46+ cudaLaunchAttribute launchAttr[1 ];
47+ launchAttr[0 ].id = cudaLaunchAttributeClusterDimension;
48+ launchAttr[0 ].val .clusterDim .x = clusterX;
49+ launchAttr[0 ].val .clusterDim .y = clusterY;
50+ launchAttr[0 ].val .clusterDim .z = clusterZ;
51+ config.numAttrs = 1 ;
52+ config.attrs = launchAttr;
53+ CUDA_REPORT_IF_ERROR (cudaLaunchKernelExC (&config, kernel, params));
54+ }
55+
3356} // extern "C"
0 commit comments