Skip to content

Commit ee44a0b

Browse files
author
Fábio Mestre
committed
Make newWorkDim non-optional and remove newLocalWorkgroup nullptr errors
1 parent 63a8782 commit ee44a0b

26 files changed

+314
-374
lines changed

include/ur_api.h

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8294,10 +8294,8 @@ typedef struct ur_exp_command_buffer_update_kernel_launch_desc_t {
82948294
///< values that describe the number of global work-items.
82958295
size_t *pNewLocalWorkSize; ///< [in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned
82968296
///< values that describe the number of work-items that make up a
8297-
///< work-group. If newWorkDim is non-zero and pNewLocalWorkSize is
8298-
///< nullptr, then runtime implementation will choose the work-group size.
8299-
///< If newWorkDim is zero and pNewLocalWorkSize is nullptr, then the local
8300-
///< work size is unchanged.
8297+
///< work-group. If pNewLocalWorkSize is nullptr, then the local work size
8298+
///< is unchanged.
83018299

83028300
} ur_exp_command_buffer_update_kernel_launch_desc_t;
83038301

@@ -8435,7 +8433,9 @@ urCommandBufferAppendKernelLaunchExp(
84358433
uint32_t workDim, ///< [in] Dimension of the kernel execution.
84368434
const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel.
84378435
const size_t *pGlobalWorkSize, ///< [in] Global work size to use when executing kernel.
8438-
const size_t *pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel.
8436+
const size_t *pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. If this
8437+
///< parameter is nullptr, then a local work size will be generated by the
8438+
///< implementation.
84398439
uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in
84408440
///< phKernelAlternatives.
84418441
ur_kernel_handle_t *phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels
@@ -8958,17 +8958,15 @@ urCommandBufferReleaseCommandExp(
89588958
/// - ::UR_RESULT_ERROR_INVALID_OPERATION
89598959
/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to.
89608960
/// + If the command-buffer `hCommand` belongs to has not been finalized.
8961-
/// + If `pUpdateKernellaunch->hNewKernel` is different from the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is zero.
8962-
/// + If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`.
8963-
/// + If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL.
8964-
/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size.
8965-
/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size.
8961+
/// + `pUpdateKernelLaunch->pNewLocalWorkSize != NULL && pUpdateKernelLaunch->pNewGlobalWorkSize == NULL`
8962+
/// + If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is different from the work-dim currently associated with `hCommand`.
89668963
/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP
89678964
/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT
89688965
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX
89698966
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE
89708967
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
89718968
/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION
8969+
/// + `pUpdateKernelLaunch->newWorkDim < 0 || pUpdateKernelLaunch->newWorkDim > 3`
89728970
/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
89738971
/// - ::UR_RESULT_ERROR_INVALID_VALUE
89748972
/// + If `pUpdateKernelLaunch->hNewKernel` was not passed to the `hKernel` or `phKernelAlternatives` parameters of ::urCommandBufferAppendKernelLaunchExp when this command was created.

scripts/core/exp-command-buffer.yml

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ members:
226226
desc: "[in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned values that describe the number of global work-items."
227227
- type: "size_t*"
228228
name: pNewLocalWorkSize
229-
desc: "[in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned values that describe the number of work-items that make up a work-group. If newWorkDim is non-zero and pNewLocalWorkSize is nullptr, then runtime implementation will choose the work-group size. If newWorkDim is zero and pNewLocalWorkSize is nullptr, then the local work size is unchanged."
229+
desc: "[in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned values that describe the number of work-items that make up a work-group. If pNewLocalWorkSize is nullptr, then the local work size is unchanged."
230230
--- #--------------------------------------------------------------------------
231231
type: typedef
232232
desc: "A value that identifies a command inside of a command-buffer, used for defining dependencies between commands in the same command-buffer."
@@ -333,7 +333,7 @@ params:
333333
desc: "[in] Global work size to use when executing kernel."
334334
- type: "const size_t*"
335335
name: pLocalWorkSize
336-
desc: "[in][optional] Local work size to use when executing kernel."
336+
desc: "[in][optional] Local work size to use when executing kernel. If this parameter is nullptr, then a local work size will be generated by the implementation."
337337
- type: uint32_t
338338
name: "numKernelAlternatives"
339339
desc: "[in] The number of kernel alternatives provided in phKernelAlternatives."
@@ -954,17 +954,15 @@ returns:
954954
- $X_RESULT_ERROR_INVALID_OPERATION:
955955
- "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to."
956956
- "If the command-buffer `hCommand` belongs to has not been finalized."
957-
- "If `pUpdateKernellaunch->hNewKernel` is different from the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is zero."
958-
- "If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`."
959-
- "If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL."
960-
- "If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size."
961-
- "If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size."
957+
- "`pUpdateKernelLaunch->pNewLocalWorkSize != NULL && pUpdateKernelLaunch->pNewGlobalWorkSize == NULL`"
958+
- "If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is different from the work-dim currently associated with `hCommand`."
962959
- $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP
963960
- $X_RESULT_ERROR_INVALID_MEM_OBJECT
964961
- $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX
965962
- $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE
966963
- $X_RESULT_ERROR_INVALID_ENUMERATION
967-
- $X_RESULT_ERROR_INVALID_WORK_DIMENSION
964+
- $X_RESULT_ERROR_INVALID_WORK_DIMENSION:
965+
- "`pUpdateKernelLaunch->newWorkDim < 0 || pUpdateKernelLaunch->newWorkDim > 3`"
968966
- $X_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
969967
- $X_RESULT_ERROR_INVALID_VALUE:
970968
- "If `pUpdateKernelLaunch->hNewKernel` was not passed to the `hKernel` or `phKernelAlternatives` parameters of $xCommandBufferAppendKernelLaunchExp when this command was created."

source/adapters/cuda/command_buffer.cpp

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -887,37 +887,11 @@ validateCommandDesc(ur_exp_command_buffer_command_handle_t Command,
887887
return UR_RESULT_ERROR_INVALID_OPERATION;
888888
}
889889

890-
const uint32_t NewWorkDim = UpdateCommandDesc->newWorkDim;
891-
if (!NewWorkDim && Command->Kernel != UpdateCommandDesc->hNewKernel) {
890+
if (UpdateCommandDesc->newWorkDim != Command->WorkDim &&
891+
Command->Kernel == UpdateCommandDesc->hNewKernel) {
892892
return UR_RESULT_ERROR_INVALID_OPERATION;
893893
}
894894

895-
if (NewWorkDim) {
896-
UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
897-
UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
898-
899-
if (NewWorkDim != Command->WorkDim &&
900-
Command->Kernel == UpdateCommandDesc->hNewKernel) {
901-
return UR_RESULT_ERROR_INVALID_OPERATION;
902-
}
903-
904-
// Error If Local size and not global size
905-
if ((UpdateCommandDesc->pNewLocalWorkSize != nullptr) &&
906-
(UpdateCommandDesc->pNewGlobalWorkSize == nullptr)) {
907-
return UR_RESULT_ERROR_INVALID_OPERATION;
908-
}
909-
910-
// Error if local size non-nullptr and created with null
911-
// or if local size nullptr and created with non-null
912-
const bool IsNewLocalSizeNull =
913-
UpdateCommandDesc->pNewLocalWorkSize == nullptr;
914-
const bool IsOriginalLocalSizeNull = Command->isNullLocalSize();
915-
916-
if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) {
917-
return UR_RESULT_ERROR_INVALID_OPERATION;
918-
}
919-
}
920-
921895
if (!Command->ValidKernelHandles.count(UpdateCommandDesc->hNewKernel)) {
922896
return UR_RESULT_ERROR_INVALID_VALUE;
923897
}

source/adapters/cuda/device.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1088,7 +1088,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
10881088
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
10891089

10901090
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP:
1091-
/*case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP:*/
10921091
return ReturnValue(true);
10931092
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: {
10941093
ur_device_command_buffer_update_capability_flags_t UpdateCapabilities =

source/adapters/hip/command_buffer.cpp

Lines changed: 15 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ commandHandleReleaseInternal(ur_exp_command_buffer_command_handle_t Command) {
4848

4949
ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
5050
ur_context_handle_t hContext, ur_device_handle_t hDevice, bool IsUpdatable)
51-
: Context(hContext), Device(hDevice), IsUpdatable(IsUpdatable),
52-
HIPGraph{nullptr}, HIPGraphExec{nullptr}, RefCountInternal{1},
53-
RefCountExternal{1}, NextSyncPoint{0} {
51+
: Context(hContext), Device(hDevice),
52+
IsUpdatable(IsUpdatable), HIPGraph{nullptr}, HIPGraphExec{nullptr},
53+
RefCountInternal{1}, RefCountExternal{1}, NextSyncPoint{0} {
5454
urContextRetain(hContext);
5555
urDeviceRetain(hDevice);
5656
}
@@ -330,9 +330,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
330330
UR_RESULT_ERROR_INVALID_KERNEL);
331331
UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
332332
UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
333+
333334
UR_ASSERT(!(pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0),
334335
UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST);
335336

337+
for (uint32_t i = 0; i < numKernelAlternatives; ++i) {
338+
UR_ASSERT(phKernelAlternatives[i] != hKernel,
339+
UR_RESULT_ERROR_INVALID_VALUE);
340+
}
341+
336342
hipGraphNode_t GraphNode;
337343
std::vector<hipGraphNode_t> DepsList;
338344

@@ -866,37 +872,11 @@ validateCommandDesc(ur_exp_command_buffer_command_handle_t Command,
866872
return UR_RESULT_ERROR_INVALID_OPERATION;
867873
}
868874

869-
const uint32_t NewWorkDim = UpdateCommandDesc->newWorkDim;
870-
if (!NewWorkDim && Command->Kernel != UpdateCommandDesc->hNewKernel) {
875+
if (UpdateCommandDesc->newWorkDim != Command->WorkDim &&
876+
Command->Kernel == UpdateCommandDesc->hNewKernel) {
871877
return UR_RESULT_ERROR_INVALID_OPERATION;
872878
}
873879

874-
if (NewWorkDim) {
875-
UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
876-
UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
877-
878-
if (NewWorkDim != Command->WorkDim &&
879-
Command->Kernel == UpdateCommandDesc->hNewKernel) {
880-
return UR_RESULT_ERROR_INVALID_OPERATION;
881-
}
882-
883-
// Error If Local size and not global size
884-
if ((UpdateCommandDesc->pNewLocalWorkSize != nullptr) &&
885-
(UpdateCommandDesc->pNewGlobalWorkSize == nullptr)) {
886-
return UR_RESULT_ERROR_INVALID_OPERATION;
887-
}
888-
889-
// Error if local size non-nullptr and created with null
890-
// or if local size nullptr and created with non-null
891-
const bool IsNewLocalSizeNull =
892-
UpdateCommandDesc->pNewLocalWorkSize == nullptr;
893-
const bool IsOriginalLocalSizeNull = Command->isNullLocalSize();
894-
895-
if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) {
896-
return UR_RESULT_ERROR_INVALID_OPERATION;
897-
}
898-
}
899-
900880
if (!Command->ValidKernelHandles.count(UpdateCommandDesc->hNewKernel)) {
901881
return UR_RESULT_ERROR_INVALID_VALUE;
902882
}
@@ -907,8 +887,8 @@ validateCommandDesc(ur_exp_command_buffer_command_handle_t Command,
907887
/**
908888
* Updates the arguments of CommandDesc->hNewKernel
909889
* @param[in] Device The device associated with the kernel being updated.
910-
* @param[in] UpdateCommandDesc The update command description that contains the
911-
* new kernel and its arguments.
890+
* @param[in] UpdateCommandDesc The update command description that contains
891+
* the new kernel and its arguments.
912892
* @return UR_RESULT_SUCCESS or an error code on failure
913893
*/
914894
ur_result_t
@@ -1020,8 +1000,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
10201000
updateKernelArguments(CommandBuffer->Device, pUpdateKernelLaunch));
10211001
UR_CHECK_ERROR(updateCommand(hCommand, pUpdateKernelLaunch));
10221002

1023-
// If no worksize is provided make sure we pass nullptr to setKernelParams so
1024-
// it can guess the local work size.
1003+
// If no worksize is provided make sure we pass nullptr to setKernelParams
1004+
// so it can guess the local work size.
10251005
const bool ProvidedLocalSize = !hCommand->isNullLocalSize();
10261006
size_t *LocalWorkSize = ProvidedLocalSize ? hCommand->LocalWorkSize : nullptr;
10271007

source/adapters/hip/device.cpp

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -903,20 +903,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
903903
case UR_DEVICE_INFO_IL_VERSION:
904904
case UR_DEVICE_INFO_ASYNC_BARRIER:
905905
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
906+
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: {
907+
int DriverVersion = 0;
908+
UR_CHECK_ERROR(hipDriverGetVersion(&DriverVersion));
906909

907-
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP:
908-
/*case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: */ {
909-
int DriverVersion = 0;
910-
UR_CHECK_ERROR(hipDriverGetVersion(&DriverVersion));
911-
912-
// Return supported for the UR command-buffer experimental feature on
913-
// ROCM 5.5.1 and later. This is to workaround HIP driver bug
914-
// https://github.com/ROCm/HIP/issues/2450 in older versions.
915-
//
916-
// The version is returned as (10000000 major + 1000000 minor + patch).
917-
const int CmdBufDriverMinVersion = 50530202; // ROCM 5.5.1
918-
return ReturnValue(DriverVersion >= CmdBufDriverMinVersion);
919-
}
910+
// Return supported for the UR command-buffer experimental feature on
911+
// ROCM 5.5.1 and later. This is to workaround HIP driver bug
912+
// https://github.com/ROCm/HIP/issues/2450 in older versions.
913+
//
914+
// The version is returned as (10000000 major + 1000000 minor + patch).
915+
const int CmdBufDriverMinVersion = 50530202; // ROCM 5.5.1
916+
return ReturnValue(DriverVersion >= CmdBufDriverMinVersion);
917+
}
920918
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: {
921919
int DriverVersion = 0;
922920
UR_CHECK_ERROR(hipDriverGetVersion(&DriverVersion));

source/adapters/level_zero/command_buffer.cpp

Lines changed: 9 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1320,35 +1320,23 @@ ur_result_t validateCommandDesc(
13201320
->mutableCommandFlags;
13211321
logger::debug("Mutable features supported by device {}", SupportedFeatures);
13221322

1323-
uint32_t Dim = CommandDesc->newWorkDim;
1324-
if (Dim != 0) {
1325-
// Error if work dim changes
1326-
if (Dim != Command->WorkDim) {
1327-
return UR_RESULT_ERROR_INVALID_OPERATION;
1328-
}
1329-
1330-
// Error If Local size and not global size
1331-
if ((CommandDesc->pNewLocalWorkSize != nullptr) &&
1332-
(CommandDesc->pNewGlobalWorkSize == nullptr)) {
1333-
return UR_RESULT_ERROR_INVALID_OPERATION;
1334-
}
1335-
1336-
// Error if local size non-nullptr and created with null
1337-
// or if local size nullptr and created with non-null
1338-
const bool IsNewLocalSizeNull = CommandDesc->pNewLocalWorkSize == nullptr;
1339-
const bool IsOriginalLocalSizeNull = !Command->UserDefinedLocalSize;
1323+
// Kernel handle updates are not yet supported.
1324+
if (CommandDesc->hNewKernel != Command->Kernel) {
1325+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
1326+
}
13401327

1341-
if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) {
1342-
return UR_RESULT_ERROR_INVALID_OPERATION;
1343-
}
1328+
// Error if work dim changes
1329+
if (CommandDesc->hNewKernel == Command->Kernel &&
1330+
CommandDesc->newWorkDim != Command->WorkDim) {
1331+
return UR_RESULT_ERROR_INVALID_OPERATION;
13441332
}
13451333

13461334
// Check if new global offset is provided.
13471335
size_t *NewGlobalWorkOffset = CommandDesc->pNewGlobalWorkOffset;
13481336
UR_ASSERT(!NewGlobalWorkOffset ||
13491337
(SupportedFeatures & ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET),
13501338
UR_RESULT_ERROR_UNSUPPORTED_FEATURE);
1351-
if (NewGlobalWorkOffset && Dim > 0) {
1339+
if (NewGlobalWorkOffset) {
13521340
if (!CommandBuffer->Context->getPlatform()
13531341
->ZeDriverGlobalOffsetExtensionFound) {
13541342
logger::error("No global offset extension found on this driver");
@@ -1618,8 +1606,6 @@ ur_result_t urCommandBufferUpdateKernelLaunchExp(
16181606
ur_exp_command_buffer_command_handle_t Command,
16191607
const ur_exp_command_buffer_update_kernel_launch_desc_t *CommandDesc) {
16201608
UR_ASSERT(Command->Kernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE);
1621-
UR_ASSERT(CommandDesc->newWorkDim <= 3,
1622-
UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
16231609

16241610
// Lock command, kernel and command buffer for update.
16251611
std::scoped_lock<ur_shared_mutex, ur_shared_mutex, ur_shared_mutex> Guard(

0 commit comments

Comments
 (0)