Skip to content

Commit 3d64f76

Browse files
author
Ewan Crawford
committed
Cuda Fix for command-buffer local argument upate
After setting kernel arguments during update, we need to reset the amount of local memory used.
1 parent 09ae26a commit 3d64f76

File tree

1 file changed

+9
-1
lines changed

1 file changed

+9
-1
lines changed

source/adapters/cuda/command_buffer.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1396,14 +1396,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
13961396

13971397
CUDA_KERNEL_NODE_PARAMS &Params = KernelCommandHandle->Params;
13981398

1399+
const auto LocalSize = KernelCommandHandle->Kernel->getLocalSize();
1400+
if (LocalSize != 0) {
1401+
// Clean the local size, otherwise calling updateKernelArguments() in
1402+
// future updates with local arguments will incorrectly increase the
1403+
// size further.
1404+
KernelCommandHandle->Kernel->clearLocalSize();
1405+
}
1406+
13991407
Params.func = CuFunc;
14001408
Params.gridDimX = BlocksPerGrid[0];
14011409
Params.gridDimY = BlocksPerGrid[1];
14021410
Params.gridDimZ = BlocksPerGrid[2];
14031411
Params.blockDimX = ThreadsPerBlock[0];
14041412
Params.blockDimY = ThreadsPerBlock[1];
14051413
Params.blockDimZ = ThreadsPerBlock[2];
1406-
Params.sharedMemBytes = KernelCommandHandle->Kernel->getLocalSize();
1414+
Params.sharedMemBytes = LocalSize;
14071415
Params.kernelParams =
14081416
const_cast<void **>(KernelCommandHandle->Kernel->getArgIndices().data());
14091417

0 commit comments

Comments
 (0)