diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp
index 2fdb6b08a3..444972dac2 100644
--- a/source/adapters/cuda/command_buffer.cpp
+++ b/source/adapters/cuda/command_buffer.cpp
@@ -354,14 +354,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
   UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
   UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
 
-  CUgraphNode GraphNode;
+  try {
+    CUgraphNode GraphNode;
 
-  std::vector<CUgraphNode> DepsList;
-  UR_CHECK_ERROR(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList,
-                                        pSyncPointWaitList, DepsList));
+    std::vector<CUgraphNode> DepsList;
+    UR_CHECK_ERROR(getNodesFromSyncPoints(
+        hCommandBuffer, numSyncPointsInWaitList, pSyncPointWaitList, DepsList));
 
-  if (*pGlobalWorkSize == 0) {
-    try {
+    if (*pGlobalWorkSize == 0) {
       // Create an empty node if the kernel workload size is zero
       UR_CHECK_ERROR(cuGraphAddEmptyNode(&GraphNode, hCommandBuffer->CudaGraph,
                                          DepsList.data(), DepsList.size()));
@@ -371,25 +371,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
       if (pSyncPoint) {
         *pSyncPoint = SyncPoint;
       }
-    } catch (ur_result_t Err) {
-      return Err;
+      return UR_RESULT_SUCCESS;
     }
-    return UR_RESULT_SUCCESS;
-  }
 
-  // Set the number of threads per block to the number of threads per warp
-  // by default unless user has provided a better number
-  size_t ThreadsPerBlock[3] = {32u, 1u, 1u};
-  size_t BlocksPerGrid[3] = {1u, 1u, 1u};
+    // Set the number of threads per block to the number of threads per warp
+    // by default unless user has provided a better number
+    size_t ThreadsPerBlock[3] = {32u, 1u, 1u};
+    size_t BlocksPerGrid[3] = {1u, 1u, 1u};
 
-  uint32_t LocalSize = hKernel->getLocalSize();
-  CUfunction CuFunc = hKernel->get();
-  UR_CHECK_ERROR(
-      setKernelParams(hCommandBuffer->Context, hCommandBuffer->Device, workDim,
-                      pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize,
-                      hKernel, CuFunc, ThreadsPerBlock, BlocksPerGrid));
+    uint32_t LocalSize = hKernel->getLocalSize();
+    CUfunction CuFunc = hKernel->get();
+    UR_CHECK_ERROR(setKernelParams(
+        hCommandBuffer->Context, hCommandBuffer->Device, workDim,
+        pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, hKernel, CuFunc,
+        ThreadsPerBlock, BlocksPerGrid));
 
-  try {
     // Set node param structure with the kernel related data
     auto &ArgIndices = hKernel->getArgIndices();
     CUDA_KERNEL_NODE_PARAMS NodeParams = {};
diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp
index 4ff38626af..80064a0d80 100644
--- a/source/adapters/hip/command_buffer.cpp
+++ b/source/adapters/hip/command_buffer.cpp
@@ -324,14 +324,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
   UR_ASSERT(!(pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0),
             UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST);
 
-  hipGraphNode_t GraphNode;
-  std::vector<hipGraphNode_t> DepsList;
+  try {
+    hipGraphNode_t GraphNode;
+    std::vector<hipGraphNode_t> DepsList;
 
-  UR_CHECK_ERROR(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList,
-                                        pSyncPointWaitList, DepsList));
+    UR_CHECK_ERROR(getNodesFromSyncPoints(
+        hCommandBuffer, numSyncPointsInWaitList, pSyncPointWaitList, DepsList));
 
-  if (*pGlobalWorkSize == 0) {
-    try {
+    if (*pGlobalWorkSize == 0) {
       // Create an empty node if the kernel workload size is zero
       UR_CHECK_ERROR(hipGraphAddEmptyNode(&GraphNode, hCommandBuffer->HIPGraph,
                                           DepsList.data(), DepsList.size()));
@@ -341,24 +341,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
       if (pSyncPoint) {
         *pSyncPoint = SyncPoint;
       }
-    } catch (ur_result_t Err) {
-      return Err;
+      return UR_RESULT_SUCCESS;
     }
-    return UR_RESULT_SUCCESS;
-  }
 
-  // Set the number of threads per block to the number of threads per warp
-  // by default unless user has provided a better number
-  size_t ThreadsPerBlock[3] = {64u, 1u, 1u};
-  size_t BlocksPerGrid[3] = {1u, 1u, 1u};
+    // Set the number of threads per block to the number of threads per warp
+    // by default unless user has provided a better number
+    size_t ThreadsPerBlock[3] = {64u, 1u, 1u};
+    size_t BlocksPerGrid[3] = {1u, 1u, 1u};
 
-  uint32_t LocalSize = hKernel->getLocalSize();
-  hipFunction_t HIPFunc = hKernel->get();
-  UR_CHECK_ERROR(setKernelParams(
-      hCommandBuffer->Device, workDim, pGlobalWorkOffset, pGlobalWorkSize,
-      pLocalWorkSize, hKernel, HIPFunc, ThreadsPerBlock, BlocksPerGrid));
+    uint32_t LocalSize = hKernel->getLocalSize();
+    hipFunction_t HIPFunc = hKernel->get();
+    UR_CHECK_ERROR(setKernelParams(
+        hCommandBuffer->Device, workDim, pGlobalWorkOffset, pGlobalWorkSize,
+        pLocalWorkSize, hKernel, HIPFunc, ThreadsPerBlock, BlocksPerGrid));
 
-  try {
     // Set node param structure with the kernel related data
     auto &ArgIndices = hKernel->getArgIndices();
     hipKernelNodeParams NodeParams;