Skip to content

Commit 6414642

Browse files
author
Fábio Mestre
committed
[SPEC] Add support for command-buffer kernel updates
1 parent c9999a7 commit 6414642

24 files changed

+218
-46
lines changed

include/ur_api.h

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8247,6 +8247,9 @@ typedef struct ur_exp_command_buffer_update_kernel_launch_desc_t {
82478247
ur_structure_type_t stype; ///< [in] type of this structure, must be
82488248
///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC
82498249
const void *pNext; ///< [in][optional] pointer to extension-specific structure
8250+
ur_kernel_handle_t hNewKernel; ///< [in] The new kernel handle. If this value is equal to the current
8251+
///< kernel handle associated
8252+
///< with the command, then only the arguments to the kernel will be updated.
82508253
uint32_t numNewMemObjArgs; ///< [in] Length of pNewMemObjArgList.
82518254
uint32_t numNewPointerArgs; ///< [in] Length of pNewPointerArgList.
82528255
uint32_t numNewValueArgs; ///< [in] Length of pNewValueArgList.
@@ -8402,6 +8405,11 @@ urCommandBufferAppendKernelLaunchExp(
84028405
const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel.
84038406
const size_t *pGlobalWorkSize, ///< [in] Global work size to use when executing kernel.
84048407
const size_t *pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel.
8408+
uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in
8409+
///< pKernelAlternatives.
8410+
ur_kernel_handle_t *phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels
8411+
///< handles that might be used to update the kernel in this
8412+
///< command after the command-buffer is finalized.
84058413
uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list.
84068414
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May
84078415
///< be ignored if command-buffer is in-order.
@@ -8909,17 +8917,18 @@ urCommandBufferReleaseCommandExp(
89098917
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
89108918
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
89118919
/// + `NULL == hCommand`
8920+
/// + `NULL == pUpdateKernelLaunch->hNewKernel`
89128921
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
89138922
/// + `NULL == pUpdateKernelLaunch`
89148923
/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE
89158924
/// + If update functionality is not supported by the device.
89168925
/// - ::UR_RESULT_ERROR_INVALID_OPERATION
89178926
/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to.
89188927
/// + If the command-buffer `hCommand` belongs to has not been finalized.
8919-
/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim used on creation of `hCommand`.
8920-
/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL.
8921-
/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value when `hCommand` was created with a NULL local work size.
8922-
/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value when `hCommand` was created with a non-NULL local work size.
8928+
/// + If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL.
8929+
/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`.
8930+
/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size.
8931+
/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size.
89238932
/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP
89248933
/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT
89258934
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX
@@ -11516,6 +11525,8 @@ typedef struct ur_command_buffer_append_kernel_launch_exp_params_t {
1151611525
const size_t **ppGlobalWorkOffset;
1151711526
const size_t **ppGlobalWorkSize;
1151811527
const size_t **ppLocalWorkSize;
11528+
uint32_t *pnumKernelAlternatives;
11529+
ur_kernel_handle_t **pphKernelAlternatives;
1151911530
uint32_t *pnumSyncPointsInWaitList;
1152011531
const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList;
1152111532
ur_exp_command_buffer_sync_point_t **ppSyncPoint;

include/ur_ddi.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1932,6 +1932,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendKernelLaunchExp_t)(
19321932
const size_t *,
19331933
const size_t *,
19341934
uint32_t,
1935+
ur_kernel_handle_t *,
1936+
uint32_t,
19351937
const ur_exp_command_buffer_sync_point_t *,
19361938
ur_exp_command_buffer_sync_point_t *,
19371939
ur_exp_command_buffer_command_handle_t *);

include/ur_print.hpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9921,6 +9921,12 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_bu
99219921
ur::details::printStruct(os,
99229922
(params.pNext));
99239923

9924+
os << ", ";
9925+
os << ".hNewKernel = ";
9926+
9927+
ur::details::printPtr(os,
9928+
(params.hNewKernel));
9929+
99249930
os << ", ";
99259931
os << ".numNewMemObjArgs = ";
99269932

@@ -15919,6 +15925,23 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
1591915925
ur::details::printPtr(os,
1592015926
*(params->ppLocalWorkSize));
1592115927

15928+
os << ", ";
15929+
os << ".numKernelAlternatives = ";
15930+
15931+
os << *(params->pnumKernelAlternatives);
15932+
15933+
os << ", ";
15934+
os << ".phKernelAlternatives = {";
15935+
for (size_t i = 0; *(params->pphKernelAlternatives) != NULL && i < *params->pnumKernelAlternatives; ++i) {
15936+
if (i != 0) {
15937+
os << ", ";
15938+
}
15939+
15940+
ur::details::printPtr(os,
15941+
(*(params->pphKernelAlternatives))[i]);
15942+
}
15943+
os << "}";
15944+
1592215945
os << ", ";
1592315946
os << ".numSyncPointsInWaitList = ";
1592415947

scripts/core/EXP-COMMAND-BUFFER.rst

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,8 @@ were obtained from.
144144
// sync-point
145145
${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim,
146146
pGlobalWorkOffset, pGlobalWorkSize,
147-
pLocalWorkSize, 1, &syncPoint,
148-
nullptr, nullptr);
147+
pLocalWorkSize, 0, nullptr, 1,
148+
&syncPoint, nullptr, nullptr);
149149
150150
Enqueueing Command-Buffers
151151
--------------------------------------------------------------------------------
@@ -191,7 +191,7 @@ parameters to the kernel and the execution ND-Range.
191191
${x}_exp_command_buffer_command_handle_t hCommand;
192192
${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim,
193193
pGlobalWorkOffset, pGlobalWorkSize,
194-
pLocalWorkSize, 0, nullptr,
194+
pLocalWorkSize, 0, nullptr, 0, nullptr,
195195
nullptr, &hCommand);
196196
197197
// Close the command-buffer before updating
@@ -220,6 +220,7 @@ parameters to the kernel and the execution ND-Range.
220220
${x}_exp_command_buffer_update_kernel_launch_desc_t update {
221221
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype
222222
nullptr, // pNext
223+
hKernel //hNewKernel
223224
2, // numNewMemobjArgs
224225
0, // numNewPointerArgs
225226
0, // numNewValueArgs

scripts/core/exp-command-buffer.yml

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,11 @@ desc: "Descriptor type for updating a kernel launch command."
170170
base: $x_base_desc_t
171171
name: $x_exp_command_buffer_update_kernel_launch_desc_t
172172
members:
173+
- type: $x_kernel_handle_t
174+
name: hNewKernel
175+
desc: |
176+
[in] The new kernel handle. If this value is equal to the current kernel handle associated
177+
with the command, then only the arguments to the kernel will be updated.
173178
- type: uint32_t
174179
name: numNewMemObjArgs
175180
desc: "[in] Length of pNewMemObjArgList."
@@ -307,6 +312,14 @@ params:
307312
- type: "const size_t*"
308313
name: pLocalWorkSize
309314
desc: "[in][optional] Local work size to use when executing kernel."
315+
- type: uint32_t
316+
name: "numKernelAlternatives"
317+
desc: "[in] The number of kernel alternatives provided in pKernelAlternatives."
318+
- type: $x_kernel_handle_t*
319+
name: "phKernelAlternatives"
320+
desc: |
321+
[in][optional][range(0, numKernelAlternatives)] List of kernels handles that might be used to update the kernel in this
322+
command after the command-buffer is finalized.
310323
- type: uint32_t
311324
name: numSyncPointsInWaitList
312325
desc: "[in] The number of sync points in the provided dependency list."
@@ -909,17 +922,16 @@ params:
909922
- type: "const $x_exp_command_buffer_update_kernel_launch_desc_t*"
910923
name: pUpdateKernelLaunch
911924
desc: "[in] Struct defining how the kernel command is to be updated."
912-
913925
returns:
914926
- $X_RESULT_ERROR_UNSUPPORTED_FEATURE:
915927
- "If update functionality is not supported by the device."
916928
- $X_RESULT_ERROR_INVALID_OPERATION:
917929
- "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to."
918930
- "If the command-buffer `hCommand` belongs to has not been finalized."
919-
- "If `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim used on creation of `hCommand`."
920-
- "If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL."
921-
- "If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value when `hCommand` was created with a NULL local work size."
922-
- "If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value when `hCommand` was created with a non-NULL local work size."
931+
- "If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL."
932+
- "If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`."
933+
- "If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size."
934+
- "If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size."
923935
- $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP
924936
- $X_RESULT_ERROR_INVALID_MEM_OBJECT
925937
- $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX

source/adapters/cuda/command_buffer.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
344344
ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel,
345345
uint32_t workDim, const size_t *pGlobalWorkOffset,
346346
const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize,
347+
uint32_t /*numKernelAlternatives*/,
348+
ur_kernel_handle_t * /*phKernelAlternatives*/,
347349
uint32_t numSyncPointsInWaitList,
348350
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
349351
ur_exp_command_buffer_sync_point_t *pSyncPoint,

source/adapters/hip/command_buffer.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
312312
ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel,
313313
uint32_t workDim, const size_t *pGlobalWorkOffset,
314314
const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize,
315+
uint32_t /*numKernelAlternatives*/,
316+
ur_kernel_handle_t * /*phKernelAlternatives*/,
315317
uint32_t numSyncPointsInWaitList,
316318
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
317319
ur_exp_command_buffer_sync_point_t *pSyncPoint,

source/adapters/level_zero/command_buffer.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -736,6 +736,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
736736
ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel,
737737
uint32_t WorkDim, const size_t *GlobalWorkOffset,
738738
const size_t *GlobalWorkSize, const size_t *LocalWorkSize,
739+
uint32_t /*numKernelAlternatives*/,
740+
ur_kernel_handle_t * /*phKernelAlternatives*/,
739741
uint32_t NumSyncPointsInWaitList,
740742
const ur_exp_command_buffer_sync_point_t *SyncPointWaitList,
741743
ur_exp_command_buffer_sync_point_t *RetSyncPoint,

source/adapters/mock/ur_mockddi.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8350,6 +8350,13 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
83508350
pGlobalWorkSize, ///< [in] Global work size to use when executing kernel.
83518351
const size_t *
83528352
pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel.
8353+
uint32_t
8354+
numKernelAlternatives, ///< [in] The number of kernel alternatives provided in
8355+
///< pKernelAlternatives.
8356+
ur_kernel_handle_t *
8357+
phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels
8358+
///< handles that might be used to update the kernel in this
8359+
///< command after the command-buffer is finalized.
83538360
uint32_t
83548361
numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list.
83558362
const ur_exp_command_buffer_sync_point_t *
@@ -8369,6 +8376,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
83698376
&pGlobalWorkOffset,
83708377
&pGlobalWorkSize,
83718378
&pLocalWorkSize,
8379+
&numKernelAlternatives,
8380+
&phKernelAlternatives,
83728381
&numSyncPointsInWaitList,
83738382
&pSyncPointWaitList,
83748383
&pSyncPoint,

source/adapters/native_cpu/command_buffer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t) {
4949
UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
5050
ur_exp_command_buffer_handle_t, ur_kernel_handle_t, uint32_t,
5151
const size_t *, const size_t *, const size_t *, uint32_t,
52-
const ur_exp_command_buffer_sync_point_t *,
52+
ur_kernel_handle_t *, uint32_t, const ur_exp_command_buffer_sync_point_t *,
5353
ur_exp_command_buffer_sync_point_t *,
5454
ur_exp_command_buffer_command_handle_t *) {
5555
detail::ur::die("Experimental Command-buffer feature is not "

0 commit comments

Comments
 (0)