Skip to content

Commit ef48291

Browse files
author
Fábio Mestre
committed
Add binary update implementation for Cuda
1 parent 787fe9a commit ef48291

File tree

19 files changed

+754
-631
lines changed

19 files changed

+754
-631
lines changed

include/ur_api.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8391,6 +8391,9 @@ urCommandBufferFinalizeExp(
83918391
/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION
83928392
/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
83938393
/// - ::UR_RESULT_ERROR_INVALID_VALUE
8394+
/// + `phKernelAlternatives == NULL && numKernelAlternatives > 0`
8395+
/// + `phKernelAlternatives != NULL && numKernelAlternatives == 0`
8396+
/// + `phKernelAlternatives` contains `hKernel`
83948397
/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP
83958398
/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP
83968399
/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`
@@ -8406,10 +8409,11 @@ urCommandBufferAppendKernelLaunchExp(
84068409
const size_t *pGlobalWorkSize, ///< [in] Global work size to use when executing kernel.
84078410
const size_t *pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel.
84088411
uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in
8409-
///< pKernelAlternatives.
8412+
///< phKernelAlternatives.
84108413
ur_kernel_handle_t *phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels
84118414
///< handles that might be used to update the kernel in this
8412-
///< command after the command-buffer is finalized.
8415+
///< command after the command-buffer is finalized. It's invalid to specify
8416+
///< the default kernel `hKernel` as part of this list.
84138417
uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list.
84148418
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May
84158419
///< be ignored if command-buffer is in-order.
@@ -8937,6 +8941,7 @@ urCommandBufferReleaseCommandExp(
89378941
/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION
89388942
/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
89398943
/// - ::UR_RESULT_ERROR_INVALID_VALUE
8944+
/// + If `pUpdateKernelLaunch->hNewKernel` was not passed to the `hKernel` or `phKernelAlternatives` parameters of ::urCommandBufferAppendKernelLaunchExp when this command was created.
89408945
/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
89418946
/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
89428947
UR_APIEXPORT ur_result_t UR_APICALL

scripts/core/exp-command-buffer.yml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ params:
319319
name: "phKernelAlternatives"
320320
desc: |
321321
[in][optional][range(0, numKernelAlternatives)] List of kernels handles that might be used to update the kernel in this
322-
command after the command-buffer is finalized.
322+
command after the command-buffer is finalized. It's invalid to specify the default kernel `hKernel` as part of this list.
323323
- type: uint32_t
324324
name: numSyncPointsInWaitList
325325
desc: "[in] The number of sync points in the provided dependency list."
@@ -338,7 +338,10 @@ returns:
338338
- $X_RESULT_ERROR_INVALID_KERNEL
339339
- $X_RESULT_ERROR_INVALID_WORK_DIMENSION
340340
- $X_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
341-
- $X_RESULT_ERROR_INVALID_VALUE
341+
- $X_RESULT_ERROR_INVALID_VALUE:
342+
- "`phKernelAlternatives == NULL && numKernelAlternatives > 0`"
343+
- "`phKernelAlternatives != NULL && numKernelAlternatives == 0`"
344+
- "`phKernelAlternatives` contains `hKernel`"
342345
- $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP
343346
- $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP:
344347
- "`pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`"
@@ -939,7 +942,8 @@ returns:
939942
- $X_RESULT_ERROR_INVALID_ENUMERATION
940943
- $X_RESULT_ERROR_INVALID_WORK_DIMENSION
941944
- $X_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
942-
- $X_RESULT_ERROR_INVALID_VALUE
945+
- $X_RESULT_ERROR_INVALID_VALUE:
946+
- "If `pUpdateKernelLaunch->hNewKernel` was not passed to the `hKernel` or `phKernelAlternatives` parameters of $xCommandBufferAppendKernelLaunchExp when this command was created."
943947
- $X_RESULT_ERROR_OUT_OF_HOST_MEMORY
944948
- $X_RESULT_ERROR_OUT_OF_RESOURCES
945949
--- #--------------------------------------------------------------------------

source/adapters/cuda/command_buffer.cpp

Lines changed: 38 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,11 @@ ur_exp_command_buffer_command_handle_t_::
7676
ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel,
7777
CUgraphNode Node, CUDA_KERNEL_NODE_PARAMS Params, uint32_t WorkDim,
7878
const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr,
79-
const size_t *LocalWorkSizePtr)
80-
: CommandBuffer(CommandBuffer), Kernel(Kernel), Node(Node), Params(Params),
81-
WorkDim(WorkDim), RefCountInternal(1), RefCountExternal(1) {
79+
const size_t *LocalWorkSizePtr, uint32_t NumKernelAlternatives,
80+
ur_kernel_handle_t *KernelAlternatives)
81+
: CommandBuffer(CommandBuffer), Kernel(Kernel), ValidKernelHandles(),
82+
Node(Node), Params(Params), WorkDim(WorkDim), RefCountInternal(1),
83+
RefCountExternal(1) {
8284
CommandBuffer->incrementInternalReferenceCount();
8385

8486
const size_t CopySize = sizeof(size_t) * WorkDim;
@@ -96,6 +98,13 @@ ur_exp_command_buffer_command_handle_t_::
9698
std::memset(GlobalWorkOffset + WorkDim, 0, ZeroSize);
9799
std::memset(GlobalWorkSize + WorkDim, 0, ZeroSize);
98100
}
101+
102+
/* Add the default Kernel as a valid kernel handle for this command */
103+
ValidKernelHandles.insert(Kernel);
104+
if (KernelAlternatives) {
105+
ValidKernelHandles.insert(KernelAlternatives,
106+
KernelAlternatives + NumKernelAlternatives);
107+
}
99108
}
100109

101110
/// Helper function for finding the Cuda Nodes associated with the
@@ -344,8 +353,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
344353
ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel,
345354
uint32_t workDim, const size_t *pGlobalWorkOffset,
346355
const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize,
347-
uint32_t /*numKernelAlternatives*/,
348-
ur_kernel_handle_t * /*phKernelAlternatives*/,
356+
uint32_t numKernelAlternatives, ur_kernel_handle_t *phKernelAlternatives,
349357
uint32_t numSyncPointsInWaitList,
350358
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
351359
ur_exp_command_buffer_sync_point_t *pSyncPoint,
@@ -356,6 +364,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
356364
UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
357365
UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
358366

367+
for (uint32_t i = 0; i < numKernelAlternatives; ++i) {
368+
UR_ASSERT(phKernelAlternatives[i] != hKernel,
369+
UR_RESULT_ERROR_INVALID_VALUE);
370+
}
371+
359372
CUgraphNode GraphNode;
360373

361374
std::vector<CUgraphNode> DepsList;
@@ -420,8 +433,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
420433
}
421434

422435
auto NewCommand = new ur_exp_command_buffer_command_handle_t_{
423-
hCommandBuffer, hKernel, GraphNode, NodeParams,
424-
workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize};
436+
hCommandBuffer, hKernel, GraphNode,
437+
NodeParams, workDim, pGlobalWorkOffset,
438+
pGlobalWorkSize, pLocalWorkSize, numKernelAlternatives,
439+
phKernelAlternatives};
425440

426441
NewCommand->incrementInternalReferenceCount();
427442
hCommandBuffer->CommandHandles.push_back(NewCommand);
@@ -865,10 +880,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
865880
}
866881

867882
if (auto NewWorkDim = pUpdateKernelLaunch->newWorkDim) {
868-
// Error if work dim changes
869-
if (NewWorkDim != hCommand->WorkDim) {
870-
return UR_RESULT_ERROR_INVALID_OPERATION;
871-
}
872883

873884
// Error If Local size and not global size
874885
if ((pUpdateKernelLaunch->pNewLocalWorkSize != nullptr) &&
@@ -888,7 +899,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
888899
}
889900

890901
// Kernel corresponding to the command to update
891-
ur_kernel_handle_t Kernel = hCommand->Kernel;
902+
ur_kernel_handle_t NewKernel = pUpdateKernelLaunch->hNewKernel;
903+
904+
if (hCommand->ValidKernelHandles.count(NewKernel)) {
905+
hCommand->Kernel = NewKernel;
906+
} else {
907+
return UR_RESULT_ERROR_INVALID_VALUE;
908+
}
892909

893910
// Update pointer arguments to the kernel
894911
uint32_t NumPointerArgs = pUpdateKernelLaunch->numNewPointerArgs;
@@ -901,7 +918,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
901918

902919
ur_result_t Result = UR_RESULT_SUCCESS;
903920
try {
904-
Kernel->setKernelArg(ArgIndex, sizeof(ArgValue), ArgValue);
921+
NewKernel->setKernelArg(ArgIndex, sizeof(ArgValue), ArgValue);
905922
} catch (ur_result_t Err) {
906923
Result = Err;
907924
return Result;
@@ -920,11 +937,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
920937
ur_result_t Result = UR_RESULT_SUCCESS;
921938
try {
922939
if (ArgValue == nullptr) {
923-
Kernel->setKernelArg(ArgIndex, 0, nullptr);
940+
NewKernel->setKernelArg(ArgIndex, 0, nullptr);
924941
} else {
925942
CUdeviceptr CuPtr =
926943
std::get<BufferMem>(ArgValue->Mem).getPtr(CommandBuffer->Device);
927-
Kernel->setKernelArg(ArgIndex, sizeof(CUdeviceptr), (void *)&CuPtr);
944+
NewKernel->setKernelArg(ArgIndex, sizeof(CUdeviceptr), (void *)&CuPtr);
928945
}
929946
} catch (ur_result_t Err) {
930947
Result = Err;
@@ -945,7 +962,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
945962
ur_result_t Result = UR_RESULT_SUCCESS;
946963

947964
try {
948-
Kernel->setKernelArg(ArgIndex, ArgSize, ArgValue);
965+
NewKernel->setKernelArg(ArgIndex, ArgSize, ArgValue);
949966
} catch (ur_result_t Err) {
950967
Result = Err;
951968
return Result;
@@ -985,12 +1002,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
9851002
// by default unless user has provided a better number
9861003
size_t ThreadsPerBlock[3] = {32u, 1u, 1u};
9871004
size_t BlocksPerGrid[3] = {1u, 1u, 1u};
988-
CUfunction CuFunc = Kernel->get();
1005+
CUfunction CuFunc = NewKernel->get();
9891006
ur_context_handle_t Context = CommandBuffer->Context;
9901007
ur_device_handle_t Device = CommandBuffer->Device;
9911008
auto Result = setKernelParams(Context, Device, WorkDim, GlobalWorkOffset,
992-
GlobalWorkSize, LocalWorkSize, Kernel, CuFunc,
993-
ThreadsPerBlock, BlocksPerGrid);
1009+
GlobalWorkSize, LocalWorkSize, NewKernel,
1010+
CuFunc, ThreadsPerBlock, BlocksPerGrid);
9941011
if (Result != UR_RESULT_SUCCESS) {
9951012
return Result;
9961013
}
@@ -1004,8 +1021,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
10041021
Params.blockDimX = ThreadsPerBlock[0];
10051022
Params.blockDimY = ThreadsPerBlock[1];
10061023
Params.blockDimZ = ThreadsPerBlock[2];
1007-
Params.sharedMemBytes = Kernel->getLocalSize();
1008-
Params.kernelParams = const_cast<void **>(Kernel->getArgIndices().data());
1024+
Params.sharedMemBytes = NewKernel->getLocalSize();
1025+
Params.kernelParams = const_cast<void **>(NewKernel->getArgIndices().data());
10091026

10101027
CUgraphNode Node = hCommand->Node;
10111028
CUgraphExec CudaGraphExec = CommandBuffer->CudaGraphExec;

source/adapters/cuda/command_buffer.hpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "logger/ur_logger.hpp"
1717
#include <cuda.h>
1818
#include <memory>
19+
#include <unordered_set>
1920

2021
// Trace an internal UR call
2122
#define UR_TRACE(Call) \
@@ -44,7 +45,8 @@ struct ur_exp_command_buffer_command_handle_t_ {
4445
ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel,
4546
CUgraphNode Node, CUDA_KERNEL_NODE_PARAMS Params, uint32_t WorkDim,
4647
const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr,
47-
const size_t *LocalWorkSizePtr);
48+
const size_t *LocalWorkSizePtr, uint32_t NumKernelAlternatives,
49+
ur_kernel_handle_t *KernelAlternatives);
4850

4951
void setGlobalOffset(const size_t *GlobalWorkOffsetPtr) {
5052
const size_t CopySize = sizeof(size_t) * WorkDim;
@@ -96,7 +98,14 @@ struct ur_exp_command_buffer_command_handle_t_ {
9698
}
9799

98100
ur_exp_command_buffer_handle_t CommandBuffer;
101+
102+
/* The currently active kernel handle for this command */
99103
ur_kernel_handle_t Kernel;
104+
105+
/* Set of all the kernel handles that can be used when updating this command
106+
*/
107+
std::unordered_set<ur_kernel_handle_t> ValidKernelHandles;
108+
100109
CUgraphNode Node;
101110
CUDA_KERNEL_NODE_PARAMS Params;
102111

source/adapters/mock/ur_mockddi.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8352,11 +8352,12 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
83528352
pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel.
83538353
uint32_t
83548354
numKernelAlternatives, ///< [in] The number of kernel alternatives provided in
8355-
///< pKernelAlternatives.
8355+
///< phKernelAlternatives.
83568356
ur_kernel_handle_t *
83578357
phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels
83588358
///< handles that might be used to update the kernel in this
8359-
///< command after the command-buffer is finalized.
8359+
///< command after the command-buffer is finalized. It's invalid to specify
8360+
///< the default kernel `hKernel` as part of this list.
83608361
uint32_t
83618362
numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list.
83628363
const ur_exp_command_buffer_sync_point_t *

source/loader/layers/tracing/ur_trcddi.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6496,11 +6496,12 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
64966496
pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel.
64976497
uint32_t
64986498
numKernelAlternatives, ///< [in] The number of kernel alternatives provided in
6499-
///< pKernelAlternatives.
6499+
///< phKernelAlternatives.
65006500
ur_kernel_handle_t *
65016501
phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels
65026502
///< handles that might be used to update the kernel in this
6503-
///< command after the command-buffer is finalized.
6503+
///< command after the command-buffer is finalized. It's invalid to specify
6504+
///< the default kernel `hKernel` as part of this list.
65046505
uint32_t
65056506
numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list.
65066507
const ur_exp_command_buffer_sync_point_t *

source/loader/layers/validation/ur_valddi.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8057,11 +8057,12 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
80578057
pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel.
80588058
uint32_t
80598059
numKernelAlternatives, ///< [in] The number of kernel alternatives provided in
8060-
///< pKernelAlternatives.
8060+
///< phKernelAlternatives.
80618061
ur_kernel_handle_t *
80628062
phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels
80638063
///< handles that might be used to update the kernel in this
8064-
///< command after the command-buffer is finalized.
8064+
///< command after the command-buffer is finalized. It's invalid to specify
8065+
///< the default kernel `hKernel` as part of this list.
80658066
uint32_t
80668067
numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list.
80678068
const ur_exp_command_buffer_sync_point_t *
@@ -8096,6 +8097,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
80968097
return UR_RESULT_ERROR_INVALID_NULL_POINTER;
80978098
}
80988099

8100+
if (phKernelAlternatives == NULL && numKernelAlternatives > 0) {
8101+
return UR_RESULT_ERROR_INVALID_VALUE;
8102+
}
8103+
8104+
if (phKernelAlternatives != NULL && numKernelAlternatives == 0) {
8105+
return UR_RESULT_ERROR_INVALID_VALUE;
8106+
}
8107+
8108+
if (phKernelAlternatives` contains `hKernel) {
8109+
return UR_RESULT_ERROR_INVALID_VALUE;
8110+
}
8111+
80998112
if (pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0) {
81008113
return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP;
81018114
}

source/loader/ur_ldrddi.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7108,11 +7108,12 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
71087108
pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel.
71097109
uint32_t
71107110
numKernelAlternatives, ///< [in] The number of kernel alternatives provided in
7111-
///< pKernelAlternatives.
7111+
///< phKernelAlternatives.
71127112
ur_kernel_handle_t *
71137113
phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels
71147114
///< handles that might be used to update the kernel in this
7115-
///< command after the command-buffer is finalized.
7115+
///< command after the command-buffer is finalized. It's invalid to specify
7116+
///< the default kernel `hKernel` as part of this list.
71167117
uint32_t
71177118
numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list.
71187119
const ur_exp_command_buffer_sync_point_t *

source/loader/ur_libapi.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7525,6 +7525,9 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp(
75257525
/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION
75267526
/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
75277527
/// - ::UR_RESULT_ERROR_INVALID_VALUE
7528+
/// + `phKernelAlternatives == NULL && numKernelAlternatives > 0`
7529+
/// + `phKernelAlternatives != NULL && numKernelAlternatives == 0`
7530+
/// + `phKernelAlternatives` contains `hKernel`
75287531
/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP
75297532
/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP
75307533
/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`
@@ -7544,11 +7547,12 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
75447547
pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel.
75457548
uint32_t
75467549
numKernelAlternatives, ///< [in] The number of kernel alternatives provided in
7547-
///< pKernelAlternatives.
7550+
///< phKernelAlternatives.
75487551
ur_kernel_handle_t *
75497552
phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels
75507553
///< handles that might be used to update the kernel in this
7551-
///< command after the command-buffer is finalized.
7554+
///< command after the command-buffer is finalized. It's invalid to specify
7555+
///< the default kernel `hKernel` as part of this list.
75527556
uint32_t
75537557
numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list.
75547558
const ur_exp_command_buffer_sync_point_t *
@@ -8321,6 +8325,7 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp(
83218325
/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION
83228326
/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
83238327
/// - ::UR_RESULT_ERROR_INVALID_VALUE
8328+
/// + If `pUpdateKernelLaunch->hNewKernel` was not passed to the `hKernel` or `phKernelAlternatives` parameters of ::urCommandBufferAppendKernelLaunchExp when this command was created.
83248329
/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
83258330
/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
83268331
ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(

0 commit comments

Comments
 (0)