Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -9536,13 +9536,15 @@ urEnqueueCooperativeKernelLaunchExp(
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `NULL == hKernel`
/// + `NULL == hDevice`
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
/// + `NULL == pLocalWorkSize`
/// + `NULL == pGroupCountRet`
/// - ::UR_RESULT_ERROR_INVALID_KERNEL
UR_APIEXPORT ur_result_t UR_APICALL
urKernelSuggestMaxCooperativeGroupCountExp(
ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object
ur_device_handle_t hDevice, ///< [in] handle of the device object
uint32_t workDim, ///< [in] number of dimensions, from 1 to 3, to specify the work-group
///< work-items
const size_t *pLocalWorkSize, ///< [in] pointer to an array of workDim unsigned values that specify the
Expand Down Expand Up @@ -11083,6 +11085,7 @@ typedef struct ur_kernel_set_specialization_constants_params_t {
/// allowing the callback the ability to modify the parameter's value
typedef struct ur_kernel_suggest_max_cooperative_group_count_exp_params_t {
ur_kernel_handle_t *phKernel;
ur_device_handle_t *phDevice;
uint32_t *pworkDim;
const size_t **ppLocalWorkSize;
size_t *pdynamicSharedMemorySize;
Expand Down
1 change: 1 addition & 0 deletions include/ur_ddi.h
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,7 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetKernelProcAddrTable_t)(
/// @brief Function-pointer for urKernelSuggestMaxCooperativeGroupCountExp
typedef ur_result_t(UR_APICALL *ur_pfnKernelSuggestMaxCooperativeGroupCountExp_t)(
ur_kernel_handle_t,
ur_device_handle_t,
uint32_t,
const size_t *,
size_t,
Expand Down
6 changes: 6 additions & 0 deletions include/ur_print.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13187,6 +13187,12 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
ur::details::printPtr(os,
*(params->phKernel));

os << ", ";
os << ".hDevice = ";

ur::details::printPtr(os,
*(params->phDevice));

os << ", ";
os << ".workDim = ";

Expand Down
3 changes: 3 additions & 0 deletions scripts/core/exp-cooperative-kernels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ params:
- type: $x_kernel_handle_t
name: hKernel
desc: "[in] handle of the kernel object"
- type: $x_device_handle_t
name: hDevice
desc: "[in] handle of the device object"
- type: uint32_t
name: workDim
desc: "[in] number of dimensions, from 1 to 3, to specify the work-group work-items"
Expand Down
7 changes: 5 additions & 2 deletions source/adapters/cuda/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle(
}

UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pLocalWorkSize,
size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet) {
ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, uint32_t workDim,
const size_t *pLocalWorkSize, size_t dynamicSharedMemorySize,
uint32_t *pGroupCountRet) {
UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_KERNEL);

std::ignore = hDevice;

size_t localWorkSize = pLocalWorkSize[0];
localWorkSize *= (workDim >= 2 ? pLocalWorkSize[1] : 1);
localWorkSize *= (workDim == 3 ? pLocalWorkSize[2] : 1);
Expand Down
6 changes: 4 additions & 2 deletions source/adapters/hip/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,11 @@ urKernelGetNativeHandle(ur_kernel_handle_t, ur_native_handle_t *) {
}

UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pLocalWorkSize,
size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet) {
ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, uint32_t workDim,
const size_t *pLocalWorkSize, size_t dynamicSharedMemorySize,
uint32_t *pGroupCountRet) {
std::ignore = hKernel;
std::ignore = hDevice;
std::ignore = workDim;
std::ignore = pLocalWorkSize;
std::ignore = dynamicSharedMemorySize;
Expand Down
9 changes: 6 additions & 3 deletions source/adapters/level_zero/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1054,8 +1054,9 @@ ur_result_t urKernelGetNativeHandle(
}

ur_result_t urKernelSuggestMaxCooperativeGroupCountExp(
ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pLocalWorkSize,
size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet) {
ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, uint32_t workDim,
const size_t *pLocalWorkSize, size_t dynamicSharedMemorySize,
uint32_t *pGroupCountRet) {
(void)dynamicSharedMemorySize;
std::shared_lock<ur_shared_mutex> Guard(hKernel->Mutex);

Expand All @@ -1066,8 +1067,10 @@ ur_result_t urKernelSuggestMaxCooperativeGroupCountExp(
ZE2UR_CALL(zeKernelSetGroupSize, (hKernel->ZeKernel, WG[0], WG[1], WG[2]));

uint32_t TotalGroupCount = 0;
ze_kernel_handle_t ZeKernel;
UR_CALL(getZeKernel(hDevice->ZeDevice, hKernel, &ZeKernel));
ZE2UR_CALL(zeKernelSuggestMaxCooperativeGroupCount,
(hKernel->ZeKernel, &TotalGroupCount));
(ZeKernel, &TotalGroupCount));
*pGroupCountRet = TotalGroupCount;
return UR_RESULT_SUCCESS;
}
Expand Down
5 changes: 3 additions & 2 deletions source/adapters/level_zero/ur_interface_loader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -691,8 +691,9 @@ ur_result_t urEnqueueCooperativeKernelLaunchExp(
const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent);
ur_result_t urKernelSuggestMaxCooperativeGroupCountExp(
ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pLocalWorkSize,
size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet);
ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, uint32_t workDim,
const size_t *pLocalWorkSize, size_t dynamicSharedMemorySize,
uint32_t *pGroupCountRet);
ur_result_t urEnqueueTimestampRecordingExp(
ur_queue_handle_t hQueue, bool blocking, uint32_t numEventsInWaitList,
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent);
Expand Down
5 changes: 3 additions & 2 deletions source/adapters/level_zero/v2/api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -560,8 +560,9 @@ ur_result_t urCommandBufferCommandGetInfoExp(
}

ur_result_t urKernelSuggestMaxCooperativeGroupCountExp(
ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pLocalWorkSize,
size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet) {
ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, uint32_t workDim,
const size_t *pLocalWorkSize, size_t dynamicSharedMemorySize,
uint32_t *pGroupCountRet) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
Expand Down
7 changes: 6 additions & 1 deletion source/adapters/mock/ur_mockddi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10057,6 +10057,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
/// @brief Intercept function for urKernelSuggestMaxCooperativeGroupCountExp
__urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object
ur_device_handle_t hDevice, ///< [in] handle of the device object
uint32_t
workDim, ///< [in] number of dimensions, from 1 to 3, to specify the work-group
///< work-items
Expand All @@ -10072,7 +10073,11 @@ __urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
ur_result_t result = UR_RESULT_SUCCESS;

ur_kernel_suggest_max_cooperative_group_count_exp_params_t params = {
&hKernel, &workDim, &pLocalWorkSize, &dynamicSharedMemorySize,
&hKernel,
&hDevice,
&workDim,
&pLocalWorkSize,
&dynamicSharedMemorySize,
&pGroupCountRet};

auto beforeCallback = reinterpret_cast<ur_mock_callback_t>(
Expand Down
1 change: 1 addition & 0 deletions source/adapters/opencl/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle(

UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
[[maybe_unused]] ur_kernel_handle_t hKernel,
[[maybe_unused]] ur_device_handle_t hDevice,
[[maybe_unused]] uint32_t workDim,
[[maybe_unused]] const size_t *pLocalWorkSize,
[[maybe_unused]] size_t dynamicSharedMemorySize,
Expand Down
9 changes: 7 additions & 2 deletions source/loader/layers/tracing/ur_trcddi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8633,6 +8633,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
/// @brief Intercept function for urKernelSuggestMaxCooperativeGroupCountExp
__urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object
ur_device_handle_t hDevice, ///< [in] handle of the device object
uint32_t
workDim, ///< [in] number of dimensions, from 1 to 3, to specify the work-group
///< work-items
Expand All @@ -8654,7 +8655,11 @@ __urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
}

ur_kernel_suggest_max_cooperative_group_count_exp_params_t params = {
&hKernel, &workDim, &pLocalWorkSize, &dynamicSharedMemorySize,
&hKernel,
&hDevice,
&workDim,
&pLocalWorkSize,
&dynamicSharedMemorySize,
&pGroupCountRet};
uint64_t instance = getContext()->notify_begin(
UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP,
Expand All @@ -8664,7 +8669,7 @@ __urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
logger.info(" ---> urKernelSuggestMaxCooperativeGroupCountExp\n");

ur_result_t result = pfnSuggestMaxCooperativeGroupCountExp(
hKernel, workDim, pLocalWorkSize, dynamicSharedMemorySize,
hKernel, hDevice, workDim, pLocalWorkSize, dynamicSharedMemorySize,
pGroupCountRet);

getContext()->notify_end(
Expand Down
12 changes: 11 additions & 1 deletion source/loader/layers/validation/ur_valddi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9656,6 +9656,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
/// @brief Intercept function for urKernelSuggestMaxCooperativeGroupCountExp
__urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object
ur_device_handle_t hDevice, ///< [in] handle of the device object
uint32_t
workDim, ///< [in] number of dimensions, from 1 to 3, to specify the work-group
///< work-items
Expand All @@ -9681,6 +9682,10 @@ __urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
return UR_RESULT_ERROR_INVALID_NULL_HANDLE;
}

if (NULL == hDevice) {
return UR_RESULT_ERROR_INVALID_NULL_HANDLE;
}

if (NULL == pLocalWorkSize) {
return UR_RESULT_ERROR_INVALID_NULL_POINTER;
}
Expand All @@ -9695,8 +9700,13 @@ __urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
getContext()->refCountContext->logInvalidReference(hKernel);
}

if (getContext()->enableLifetimeValidation &&
!getContext()->refCountContext->isReferenceValid(hDevice)) {
getContext()->refCountContext->logInvalidReference(hDevice);
}

ur_result_t result = pfnSuggestMaxCooperativeGroupCountExp(
hKernel, workDim, pLocalWorkSize, dynamicSharedMemorySize,
hKernel, hDevice, workDim, pLocalWorkSize, dynamicSharedMemorySize,
pGroupCountRet);

return result;
Expand Down
6 changes: 5 additions & 1 deletion source/loader/ur_ldrddi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8844,6 +8844,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
/// @brief Intercept function for urKernelSuggestMaxCooperativeGroupCountExp
__urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object
ur_device_handle_t hDevice, ///< [in] handle of the device object
uint32_t
workDim, ///< [in] number of dimensions, from 1 to 3, to specify the work-group
///< work-items
Expand Down Expand Up @@ -8871,9 +8872,12 @@ __urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
// convert loader handle to platform handle
hKernel = reinterpret_cast<ur_kernel_object_t *>(hKernel)->handle;

// convert loader handle to platform handle
hDevice = reinterpret_cast<ur_device_object_t *>(hDevice)->handle;

// forward to device-platform
result = pfnSuggestMaxCooperativeGroupCountExp(
hKernel, workDim, pLocalWorkSize, dynamicSharedMemorySize,
hKernel, hDevice, workDim, pLocalWorkSize, dynamicSharedMemorySize,
pGroupCountRet);

return result;
Expand Down
4 changes: 3 additions & 1 deletion source/loader/ur_libapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8935,12 +8935,14 @@ ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `NULL == hKernel`
/// + `NULL == hDevice`
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
/// + `NULL == pLocalWorkSize`
/// + `NULL == pGroupCountRet`
/// - ::UR_RESULT_ERROR_INVALID_KERNEL
ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object
ur_device_handle_t hDevice, ///< [in] handle of the device object
uint32_t
workDim, ///< [in] number of dimensions, from 1 to 3, to specify the work-group
///< work-items
Expand All @@ -8961,7 +8963,7 @@ ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
}

return pfnSuggestMaxCooperativeGroupCountExp(
hKernel, workDim, pLocalWorkSize, dynamicSharedMemorySize,
hKernel, hDevice, workDim, pLocalWorkSize, dynamicSharedMemorySize,
pGroupCountRet);
} catch (...) {
return exceptionToResult(std::current_exception());
Expand Down
2 changes: 2 additions & 0 deletions source/ur_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7578,12 +7578,14 @@ ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `NULL == hKernel`
/// + `NULL == hDevice`
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
/// + `NULL == pLocalWorkSize`
/// + `NULL == pGroupCountRet`
/// - ::UR_RESULT_ERROR_INVALID_KERNEL
ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object
ur_device_handle_t hDevice, ///< [in] handle of the device object
uint32_t
workDim, ///< [in] number of dimensions, from 1 to 3, to specify the work-group
///< work-items
Expand Down
Loading