Skip to content

Commit fb923ef

Browse files
author
Fábio Mestre
committed
Add hip support and create capabilities flag
1 parent 3510f12 commit fb923ef

38 files changed

+789
-287
lines changed

include/ur_api.h

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1629,8 +1629,8 @@ typedef enum ur_device_info_t {
16291629
///< `EnqueueDeviceGlobalVariableRead` entry points.
16301630
UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP = 0x1000, ///< [::ur_bool_t] Returns true if the device supports the use of
16311631
///< command-buffers.
1632-
UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP = 0x1001, ///< [::ur_bool_t] Returns true if the device supports updating the kernel
1633-
///< commands in a command-buffer.
1632+
UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP = 0x1001, ///< [::ur_device_command_buffer_update_capability_flags_t] Command-buffer
1633+
///< update capabilities of the device
16341634
UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP = 0x1111, ///< [::ur_bool_t] return true if enqueue Cluster Launch is supported
16351635
UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP = 0x2000, ///< [::ur_bool_t] returns true if the device supports the creation of
16361636
///< bindless images
@@ -8155,6 +8155,27 @@ urBindlessImagesSignalExternalSemaphoreExp(
81558155
#if !defined(__GNUC__)
81568156
#pragma region command_buffer_(experimental)
81578157
#endif
8158+
///////////////////////////////////////////////////////////////////////////////
8159+
/// @brief Device kernel execution capability
8160+
typedef uint32_t ur_device_command_buffer_update_capability_flags_t;
8161+
typedef enum ur_device_command_buffer_update_capability_flag_t {
8162+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS = UR_BIT(0), ///< Device supports updating the kernel arguments in command-buffer
8163+
///< commands.
8164+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE = UR_BIT(1), ///< Device supports updating the local work-group size in command-buffer
8165+
///< commands.
8166+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE = UR_BIT(2), ///< Device supports updating the global work-group size in command-buffer
8167+
///< commands.
8168+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET = UR_BIT(3), ///< Device supports updating the global work offset in command-buffer
8169+
///< commands.
8170+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE = UR_BIT(4), ///< Device supports updating the kernel handle in command-buffer commands.
8171+
/// @cond
8172+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_FORCE_UINT32 = 0x7fffffff
8173+
/// @endcond
8174+
8175+
} ur_device_command_buffer_update_capability_flag_t;
8176+
/// @brief Bit Mask for validating ur_device_command_buffer_update_capability_flags_t
8177+
#define UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAGS_MASK 0xffffffe0
8178+
81588179
///////////////////////////////////////////////////////////////////////////////
81598180
/// @brief Command-buffer query information type
81608181
typedef enum ur_exp_command_buffer_info_t {
@@ -8208,7 +8229,7 @@ typedef struct ur_exp_command_buffer_update_memobj_arg_desc_t {
82088229
///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC
82098230
const void *pNext; ///< [in][optional] pointer to extension-specific structure
82108231
uint32_t argIndex; ///< [in] Argument index.
8211-
const ur_kernel_arg_mem_obj_properties_t *pProperties; ///< [in][optinal] Pointer to memory object properties.
8232+
const ur_kernel_arg_mem_obj_properties_t *pProperties; ///< [in][optional] Pointer to memory object properties.
82128233
ur_mem_handle_t hNewMemObjArg; ///< [in][optional] Handle of memory object to set at argument index.
82138234

82148235
} ur_exp_command_buffer_update_memobj_arg_desc_t;
@@ -8220,7 +8241,7 @@ typedef struct ur_exp_command_buffer_update_pointer_arg_desc_t {
82208241
///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC
82218242
const void *pNext; ///< [in][optional] pointer to extension-specific structure
82228243
uint32_t argIndex; ///< [in] Argument index.
8223-
const ur_kernel_arg_pointer_properties_t *pProperties; ///< [in][optinal] Pointer to USM pointer properties.
8244+
const ur_kernel_arg_pointer_properties_t *pProperties; ///< [in][optional] Pointer to USM pointer properties.
82248245
const void *pNewPointerArg; ///< [in][optional] USM pointer to memory location holding the argument
82258246
///< value to set at argument index.
82268247

@@ -8234,7 +8255,7 @@ typedef struct ur_exp_command_buffer_update_value_arg_desc_t {
82348255
const void *pNext; ///< [in][optional] pointer to extension-specific structure
82358256
uint32_t argIndex; ///< [in] Argument index.
82368257
uint32_t argSize; ///< [in] Argument size.
8237-
const ur_kernel_arg_value_properties_t *pProperties; ///< [in][optinal] Pointer to value properties.
8258+
const ur_kernel_arg_value_properties_t *pProperties; ///< [in][optional] Pointer to value properties.
82388259
const void *pNewValueArg; ///< [in][optional] Argument value representing matching kernel arg type to
82398260
///< set at argument index.
82408261

@@ -8411,8 +8432,9 @@ urCommandBufferAppendKernelLaunchExp(
84118432
///< phKernelAlternatives.
84128433
ur_kernel_handle_t *phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels
84138434
///< handles that might be used to update the kernel in this
8414-
///< command after the command-buffer is finalized. It's invalid to specify
8415-
///< the default kernel `hKernel` as part of this list.
8435+
///< command after the command-buffer is finalized. The default kernel
8436+
///< `hKernel` is implicitly marked as an alternative. It's
8437+
///< invalid to specify it as part of this list.
84168438
uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list.
84178439
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May
84188440
///< be ignored if command-buffer is in-order.
@@ -8928,8 +8950,9 @@ urCommandBufferReleaseCommandExp(
89288950
/// - ::UR_RESULT_ERROR_INVALID_OPERATION
89298951
/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to.
89308952
/// + If the command-buffer `hCommand` belongs to has not been finalized.
8953+
/// + If `pUpdateKernellaunch->hNewKernel` is different from the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is zero.
8954+
/// + If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`.
89318955
/// + If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL.
8932-
/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`.
89338956
/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size.
89348957
/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size.
89358958
/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP

include/ur_print.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -970,6 +970,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpExternalSemaphoreDesc(const struct
970970
/// - `buff_size < out_size`
971971
UR_APIEXPORT ur_result_t UR_APICALL urPrintExpImageCopyRegion(const struct ur_exp_image_copy_region_t params, char *buffer, const size_t buff_size, size_t *out_size);
972972

973+
///////////////////////////////////////////////////////////////////////////////
974+
/// @brief Print ur_device_command_buffer_update_capability_flag_t enum
975+
/// @returns
976+
/// - ::UR_RESULT_SUCCESS
977+
/// - ::UR_RESULT_ERROR_INVALID_SIZE
978+
/// - `buff_size < out_size`
979+
UR_APIEXPORT ur_result_t UR_APICALL urPrintDeviceCommandBufferUpdateCapabilityFlags(enum ur_device_command_buffer_update_capability_flag_t value, char *buffer, const size_t buff_size, size_t *out_size);
980+
973981
///////////////////////////////////////////////////////////////////////////////
974982
/// @brief Print ur_exp_command_buffer_info_t enum
975983
/// @returns

include/ur_print.hpp

Lines changed: 108 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,8 @@ inline ur_result_t printFlag<ur_usm_migration_flag_t>(std::ostream &os, uint32_t
197197
template <>
198198
inline ur_result_t printFlag<ur_exp_image_copy_flag_t>(std::ostream &os, uint32_t flag);
199199

200+
template <>
201+
inline ur_result_t printFlag<ur_device_command_buffer_update_capability_flag_t>(std::ostream &os, uint32_t flag);
200202
template <>
201203
inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_command_buffer_info_t value, size_t size);
202204

@@ -335,6 +337,7 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
335337
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_external_mem_desc_t params);
336338
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_external_semaphore_desc_t params);
337339
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_image_copy_region_t params);
340+
inline std::ostream &operator<<(std::ostream &os, enum ur_device_command_buffer_update_capability_flag_t value);
338341
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_command_buffer_info_t value);
339342
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_command_buffer_command_info_t value);
340343
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_desc_t params);
@@ -2541,8 +2544,8 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
25412544
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP:
25422545
os << "UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP";
25432546
break;
2544-
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP:
2545-
os << "UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP";
2547+
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP:
2548+
os << "UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP";
25462549
break;
25472550
case UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP:
25482551
os << "UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP";
@@ -4049,15 +4052,16 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info
40494052

40504053
os << ")";
40514054
} break;
4052-
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: {
4053-
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
4054-
if (sizeof(ur_bool_t) > size) {
4055-
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
4055+
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: {
4056+
const ur_device_command_buffer_update_capability_flags_t *tptr = (const ur_device_command_buffer_update_capability_flags_t *)ptr;
4057+
if (sizeof(ur_device_command_buffer_update_capability_flags_t) > size) {
4058+
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_command_buffer_update_capability_flags_t) << ")";
40564059
return UR_RESULT_ERROR_INVALID_SIZE;
40574060
}
40584061
os << (const void *)(tptr) << " (";
40594062

4060-
os << *tptr;
4063+
ur::details::printFlag<ur_device_command_buffer_update_capability_flag_t>(os,
4064+
*tptr);
40614065

40624066
os << ")";
40634067
} break;
@@ -9669,6 +9673,103 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_image_copy
96699673
return os;
96709674
}
96719675
///////////////////////////////////////////////////////////////////////////////
9676+
/// @brief Print operator for the ur_device_command_buffer_update_capability_flag_t type
9677+
/// @returns
9678+
/// std::ostream &
9679+
inline std::ostream &operator<<(std::ostream &os, enum ur_device_command_buffer_update_capability_flag_t value) {
9680+
switch (value) {
9681+
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS:
9682+
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS";
9683+
break;
9684+
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE:
9685+
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE";
9686+
break;
9687+
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE:
9688+
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE";
9689+
break;
9690+
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET:
9691+
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET";
9692+
break;
9693+
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE:
9694+
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE";
9695+
break;
9696+
default:
9697+
os << "unknown enumerator";
9698+
break;
9699+
}
9700+
return os;
9701+
}
9702+
9703+
namespace ur::details {
9704+
///////////////////////////////////////////////////////////////////////////////
9705+
/// @brief Print ur_device_command_buffer_update_capability_flag_t flag
9706+
template <>
9707+
inline ur_result_t printFlag<ur_device_command_buffer_update_capability_flag_t>(std::ostream &os, uint32_t flag) {
9708+
uint32_t val = flag;
9709+
bool first = true;
9710+
9711+
if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS) {
9712+
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS;
9713+
if (!first) {
9714+
os << " | ";
9715+
} else {
9716+
first = false;
9717+
}
9718+
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS;
9719+
}
9720+
9721+
if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE) {
9722+
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE;
9723+
if (!first) {
9724+
os << " | ";
9725+
} else {
9726+
first = false;
9727+
}
9728+
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE;
9729+
}
9730+
9731+
if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE) {
9732+
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE;
9733+
if (!first) {
9734+
os << " | ";
9735+
} else {
9736+
first = false;
9737+
}
9738+
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE;
9739+
}
9740+
9741+
if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET) {
9742+
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET;
9743+
if (!first) {
9744+
os << " | ";
9745+
} else {
9746+
first = false;
9747+
}
9748+
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET;
9749+
}
9750+
9751+
if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE) {
9752+
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE;
9753+
if (!first) {
9754+
os << " | ";
9755+
} else {
9756+
first = false;
9757+
}
9758+
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE;
9759+
}
9760+
if (val != 0) {
9761+
std::bitset<32> bits(val);
9762+
if (!first) {
9763+
os << " | ";
9764+
}
9765+
os << "unknown bit flags " << bits;
9766+
} else if (first) {
9767+
os << "0";
9768+
}
9769+
return UR_RESULT_SUCCESS;
9770+
}
9771+
} // namespace ur::details
9772+
///////////////////////////////////////////////////////////////////////////////
96729773
/// @brief Print operator for the ur_exp_command_buffer_info_t type
96739774
/// @returns
96749775
/// std::ostream &

scripts/core/EXP-COMMAND-BUFFER.rst

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,9 @@ Updating Command-Buffer Commands
167167

168168
An adapter implementing the command-buffer experimental feature can optionally
169169
support updating the configuration of kernel commands recorded to a
170-
command-buffer. Support for this is reported by returning true in the
171-
${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP query.
170+
command-buffer. The attributes of kernel commands that can be updated are
171+
device specific and can be queried using the
172+
${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP query.
172173

173174
Updating kernel commands is done by passing the new kernel configuration
174175
to ${x}CommandBufferUpdateKernelLaunchExp along with the command handle of
@@ -259,7 +260,13 @@ Enums
259260
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
260261
* ${x}_device_info_t
261262
* ${X}_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP
262-
* ${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP
263+
* ${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP
264+
* ${x}_device_command_buffer_update_capability_flags_t
265+
* UPDATE_KERNEL_ARGUMENTS
266+
* LOCAL_WORK_SIZE
267+
* GLOBAL_WORK_SIZE
268+
* GLOBAL_WORK_OFFSET
269+
* KERNEL_HANDLE
263270
* ${x}_result_t
264271
* ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP
265272
* ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP

0 commit comments

Comments
 (0)