diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 2fdb6b08a3..444972dac2 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -354,14 +354,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - CUgraphNode GraphNode; + try { + CUgraphNode GraphNode; - std::vector DepsList; - UR_CHECK_ERROR(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList, - pSyncPointWaitList, DepsList)); + std::vector DepsList; + UR_CHECK_ERROR(getNodesFromSyncPoints( + hCommandBuffer, numSyncPointsInWaitList, pSyncPointWaitList, DepsList)); - if (*pGlobalWorkSize == 0) { - try { + if (*pGlobalWorkSize == 0) { // Create an empty node if the kernel workload size is zero UR_CHECK_ERROR(cuGraphAddEmptyNode(&GraphNode, hCommandBuffer->CudaGraph, DepsList.data(), DepsList.size())); @@ -371,25 +371,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( if (pSyncPoint) { *pSyncPoint = SyncPoint; } - } catch (ur_result_t Err) { - return Err; + return UR_RESULT_SUCCESS; } - return UR_RESULT_SUCCESS; - } - // Set the number of threads per block to the number of threads per warp - // by default unless user has provided a better number - size_t ThreadsPerBlock[3] = {32u, 1u, 1u}; - size_t BlocksPerGrid[3] = {1u, 1u, 1u}; + // Set the number of threads per block to the number of threads per warp + // by default unless user has provided a better number + size_t ThreadsPerBlock[3] = {32u, 1u, 1u}; + size_t BlocksPerGrid[3] = {1u, 1u, 1u}; - uint32_t LocalSize = hKernel->getLocalSize(); - CUfunction CuFunc = hKernel->get(); - UR_CHECK_ERROR( - setKernelParams(hCommandBuffer->Context, hCommandBuffer->Device, workDim, - pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, - hKernel, CuFunc, ThreadsPerBlock, BlocksPerGrid)); + uint32_t LocalSize = hKernel->getLocalSize(); + CUfunction CuFunc = hKernel->get(); + UR_CHECK_ERROR(setKernelParams( + hCommandBuffer->Context, hCommandBuffer->Device, workDim, + pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, hKernel, CuFunc, + ThreadsPerBlock, BlocksPerGrid)); - try { // Set node param structure with the kernel related data auto &ArgIndices = hKernel->getArgIndices(); CUDA_KERNEL_NODE_PARAMS NodeParams = {}; diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index 4ff38626af..80064a0d80 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -324,14 +324,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( UR_ASSERT(!(pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - hipGraphNode_t GraphNode; - std::vector DepsList; + try { + hipGraphNode_t GraphNode; + std::vector DepsList; - UR_CHECK_ERROR(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList, - pSyncPointWaitList, DepsList)); + UR_CHECK_ERROR(getNodesFromSyncPoints( + hCommandBuffer, numSyncPointsInWaitList, pSyncPointWaitList, DepsList)); - if (*pGlobalWorkSize == 0) { - try { + if (*pGlobalWorkSize == 0) { // Create an empty node if the kernel workload size is zero UR_CHECK_ERROR(hipGraphAddEmptyNode(&GraphNode, hCommandBuffer->HIPGraph, DepsList.data(), DepsList.size())); @@ -341,24 +341,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( if (pSyncPoint) { *pSyncPoint = SyncPoint; } - } catch (ur_result_t Err) { - return Err; + return UR_RESULT_SUCCESS; } - return UR_RESULT_SUCCESS; - } - // Set the number of threads per block to the number of threads per warp - // by default unless user has provided a better number - size_t ThreadsPerBlock[3] = {64u, 1u, 1u}; - size_t BlocksPerGrid[3] = {1u, 1u, 1u}; + // Set the number of threads per block to the number of threads per warp + // by default unless user has provided a better number + size_t ThreadsPerBlock[3] = {64u, 1u, 1u}; + size_t BlocksPerGrid[3] = {1u, 1u, 1u}; - uint32_t LocalSize = hKernel->getLocalSize(); - hipFunction_t HIPFunc = hKernel->get(); - UR_CHECK_ERROR(setKernelParams( - hCommandBuffer->Device, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, hKernel, HIPFunc, ThreadsPerBlock, BlocksPerGrid)); + uint32_t LocalSize = hKernel->getLocalSize(); + hipFunction_t HIPFunc = hKernel->get(); + UR_CHECK_ERROR(setKernelParams( + hCommandBuffer->Device, workDim, pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, hKernel, HIPFunc, ThreadsPerBlock, BlocksPerGrid)); - try { // Set node param structure with the kernel related data auto &ArgIndices = hKernel->getArgIndices(); hipKernelNodeParams NodeParams;