@@ -369,42 +369,38 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
369369 UR_RESULT_ERROR_INVALID_VALUE);
370370 }
371371
372- CUgraphNode GraphNode;
372+ try {
373+ CUgraphNode GraphNode;
373374
374- std::vector<CUgraphNode> DepsList;
375- UR_CHECK_ERROR (getNodesFromSyncPoints (hCommandBuffer, numSyncPointsInWaitList,
376- pSyncPointWaitList, DepsList));
375+ std::vector<CUgraphNode> DepsList;
376+ UR_CHECK_ERROR (getNodesFromSyncPoints (hCommandBuffer, numSyncPointsInWaitList,
377+ pSyncPointWaitList, DepsList));
377378
378- if (*pGlobalWorkSize == 0 ) {
379- try {
379+ if (*pGlobalWorkSize == 0 ) {
380380 // Create an empty node if the kernel workload size is zero
381381 UR_CHECK_ERROR (cuGraphAddEmptyNode (&GraphNode, hCommandBuffer->CudaGraph ,
382- DepsList.data (), DepsList.size ()));
382+ DepsList.data (), DepsList.size ()));
383383
384384 // Get sync point and register the cuNode with it.
385385 auto SyncPoint = hCommandBuffer->addSyncPoint (GraphNode);
386386 if (pSyncPoint) {
387387 *pSyncPoint = SyncPoint;
388388 }
389- } catch (ur_result_t Err) {
390- return Err;
389+ return UR_RESULT_SUCCESS;
391390 }
392- return UR_RESULT_SUCCESS;
393- }
394391
395- // Set the number of threads per block to the number of threads per warp
396- // by default unless user has provided a better number
397- size_t ThreadsPerBlock[3 ] = {32u , 1u , 1u };
398- size_t BlocksPerGrid[3 ] = {1u , 1u , 1u };
392+ // Set the number of threads per block to the number of threads per warp
393+ // by default unless user has provided a better number
394+ size_t ThreadsPerBlock[3 ] = {32u , 1u , 1u };
395+ size_t BlocksPerGrid[3 ] = {1u , 1u , 1u };
399396
400- uint32_t LocalSize = hKernel->getLocalSize ();
401- CUfunction CuFunc = hKernel->get ();
402- UR_CHECK_ERROR (
403- setKernelParams (hCommandBuffer->Context , hCommandBuffer->Device , workDim,
404- pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize,
405- hKernel, CuFunc, ThreadsPerBlock, BlocksPerGrid));
397+ uint32_t LocalSize = hKernel->getLocalSize ();
398+ CUfunction CuFunc = hKernel->get ();
399+ UR_CHECK_ERROR (
400+ setKernelParams (hCommandBuffer->Context , hCommandBuffer->Device , workDim,
401+ pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize,
402+ hKernel, CuFunc, ThreadsPerBlock, BlocksPerGrid));
406403
407- try {
408404 // Set node param structure with the kernel related data
409405 auto &ArgIndices = hKernel->getArgIndices ();
410406 CUDA_KERNEL_NODE_PARAMS NodeParams = {};
0 commit comments