Skip to content

Commit 68eb2ea

Browse files
committed
Merge remote-tracking branch 'origin/main' into yc/new-api-suggestgroupsize
2 parents 540d6ce + ea00936 commit 68eb2ea

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+1095
-372
lines changed

.github/scripts/get_system_info.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ function system_info {
5353
echo "**********/proc/meminfo**********"
5454
cat /proc/meminfo
5555
echo "**********build/bin/urinfo**********"
56-
$(dirname "$(readlink -f "$0")")/../../build/bin/urinfo || true
56+
$(dirname "$(readlink -f "$0")")/../../build/bin/urinfo --no-linear-ids --verbose || true
5757
echo "******OpenCL*******"
5858
# The driver version of OpenCL Graphics is the compute-runtime version
5959
clinfo || echo "OpenCL not installed"

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# See LICENSE.TXT
44
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
55

6-
cmake_minimum_required(VERSION 3.14.0 FATAL_ERROR)
6+
cmake_minimum_required(VERSION 3.20.0 FATAL_ERROR)
77
project(unified-runtime VERSION 0.9.0)
88

99
include(GNUInstallDirs)

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ for more detailed instructions on the correct setup.
8787

8888
Required packages:
8989
- C++ compiler with C++17 support
90-
- [CMake](https://cmake.org/) >= 3.14.0
90+
- [CMake](https://cmake.org/) >= 3.20.0
9191
- Python v3.6.6 or later
9292

9393
### Windows
@@ -140,6 +140,7 @@ List of options provided by CMake:
140140
| UR_HIP_PLATFORM | Build HIP adapter for AMD or NVIDIA platform | AMD/NVIDIA | AMD |
141141
| UR_ENABLE_COMGR | Enable comgr lib usage | AMD/NVIDIA | AMD |
142142
| UR_DPCXX | Path of the DPC++ compiler executable to build CTS device binaries | File path | `""` |
143+
| UR_DEVICE_CODE_EXTRACTOR | Path of the `clang-offload-extract` executable from the DPC++ package, required for CTS device binaries | File path | `"${dirname(UR_DPCXX)}/clang-offload-extract"` |
143144
| UR_DPCXX_BUILD_FLAGS | Build flags to pass to DPC++ when compiling device programs | Space-separated options list | `""` |
144145
| UR_SYCL_LIBRARY_DIR | Path of the SYCL runtime library directory to build CTS device binaries | Directory path | `""` |
145146
| UR_HIP_ROCM_DIR | Path of the default ROCm HIP installation | Directory path | `/opt/rocm` |

include/ur_api.h

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,8 @@ typedef enum ur_function_t {
222222
UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP = 220, ///< Enumerator for ::urCommandBufferUpdateKernelLaunchExp
223223
UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP = 221, ///< Enumerator for ::urCommandBufferGetInfoExp
224224
UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP = 222, ///< Enumerator for ::urCommandBufferCommandGetInfoExp
225-
UR_FUNCTION_KERNEL_GET_SUGGESTED_LOCAL_WORK_SIZE = 223, ///< Enumerator for ::urKernelGetSuggestedLocalWorkSize
225+
UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP = 223, ///< Enumerator for ::urEnqueueTimestampRecordingExp
226+
UR_FUNCTION_KERNEL_GET_SUGGESTED_LOCAL_WORK_SIZE = 224, ///< Enumerator for ::urKernelGetSuggestedLocalWorkSize
226227
/// @cond
227228
UR_FUNCTION_FORCE_UINT32 = 0x7fffffff
228229
/// @endcond
@@ -1642,6 +1643,7 @@ typedef enum ur_device_info_t {
16421643
///< backed 3D sampled image data.
16431644
UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP = 0x2017, ///< [::ur_bool_t] returns true if the device is capable of fetching
16441645
///< non-USM backed 3D sampled image data.
1646+
UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP = 0x2018, ///< [::ur_bool_t] returns true if the device supports timestamp recording
16451647
/// @cond
16461648
UR_DEVICE_INFO_FORCE_UINT32 = 0x7fffffff
16471649
/// @endcond
@@ -1667,7 +1669,7 @@ typedef enum ur_device_info_t {
16671669
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
16681670
/// + `NULL == hDevice`
16691671
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
1670-
/// + `::UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP < propName`
1672+
/// + `::UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP < propName`
16711673
/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION
16721674
/// + If `propName` is not supported by the adapter.
16731675
/// - ::UR_RESULT_ERROR_INVALID_SIZE
@@ -5656,6 +5658,7 @@ typedef enum ur_command_t {
56565658
UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP = 0x1000, ///< Event created by ::urCommandBufferEnqueueExp
56575659
UR_COMMAND_INTEROP_SEMAPHORE_WAIT_EXP = 0x2000, ///< Event created by ::urBindlessImagesWaitExternalSemaphoreExp
56585660
UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP = 0x2001, ///< Event created by ::urBindlessImagesSignalExternalSemaphoreExp
5661+
UR_COMMAND_TIMESTAMP_RECORDING_EXP = 0x2002, ///< Event created by ::urEnqueueTimestampRecordingExp
56595662
/// @cond
56605663
UR_COMMAND_FORCE_UINT32 = 0x7fffffff
56615664
/// @endcond
@@ -8928,6 +8931,46 @@ urKernelSuggestMaxCooperativeGroupCountExp(
89288931
uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups
89298932
);
89308933

8934+
#if !defined(__GNUC__)
8935+
#pragma endregion
8936+
#endif
8937+
// Intel 'oneAPI' Unified Runtime Experimental APIs for enqueuing timestamp recordings
8938+
#if !defined(__GNUC__)
8939+
#pragma region enqueue timestamp recording(experimental)
8940+
#endif
8941+
///////////////////////////////////////////////////////////////////////////////
8942+
/// @brief Enqueue a command for recording the device timestamp
8943+
///
8944+
/// @returns
8945+
/// - ::UR_RESULT_SUCCESS
8946+
/// - ::UR_RESULT_ERROR_UNINITIALIZED
8947+
/// - ::UR_RESULT_ERROR_DEVICE_LOST
8948+
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
8949+
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
8950+
/// + `NULL == hQueue`
8951+
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
8952+
/// + `NULL == phEvent`
8953+
/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST
8954+
UR_APIEXPORT ur_result_t UR_APICALL
8955+
urEnqueueTimestampRecordingExp(
8956+
ur_queue_handle_t hQueue, ///< [in] handle of the queue object
8957+
bool blocking, ///< [in] indicates whether the call to this function should block until
8958+
///< until the device timestamp recording command has executed on the
8959+
///< device.
8960+
uint32_t numEventsInWaitList, ///< [in] size of the event wait list
8961+
const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of
8962+
///< events that must be complete before the kernel execution.
8963+
///< If nullptr, the numEventsInWaitList must be 0, indicating no wait
8964+
///< events.
8965+
ur_event_handle_t *phEvent ///< [in,out] return an event object that identifies this particular kernel
8966+
///< execution instance. Profiling information can be queried
8967+
///< from this event as if `hQueue` had profiling enabled. Querying
8968+
///< `UR_PROFILING_INFO_COMMAND_QUEUED` or `UR_PROFILING_INFO_COMMAND_SUBMIT`
8969+
///< reports the timestamp at the time of the call to this function.
8970+
///< Querying `UR_PROFILING_INFO_COMMAND_START` or `UR_PROFILING_INFO_COMMAND_END`
8971+
///< reports the timestamp recorded when the command is executed on the device.
8972+
);
8973+
89318974
#if !defined(__GNUC__)
89328975
#pragma endregion
89338976
#endif
@@ -10651,6 +10694,18 @@ typedef struct ur_enqueue_cooperative_kernel_launch_exp_params_t {
1065110694
ur_event_handle_t **pphEvent;
1065210695
} ur_enqueue_cooperative_kernel_launch_exp_params_t;
1065310696

10697+
///////////////////////////////////////////////////////////////////////////////
10698+
/// @brief Function parameters for urEnqueueTimestampRecordingExp
10699+
/// @details Each entry is a pointer to the parameter passed to the function;
10700+
/// allowing the callback the ability to modify the parameter's value
10701+
typedef struct ur_enqueue_timestamp_recording_exp_params_t {
10702+
ur_queue_handle_t *phQueue;
10703+
bool *pblocking;
10704+
uint32_t *pnumEventsInWaitList;
10705+
const ur_event_handle_t **pphEventWaitList;
10706+
ur_event_handle_t **pphEvent;
10707+
} ur_enqueue_timestamp_recording_exp_params_t;
10708+
1065410709
///////////////////////////////////////////////////////////////////////////////
1065510710
/// @brief Function parameters for urBindlessImagesUnsampledImageHandleDestroyExp
1065610711
/// @details Each entry is a pointer to the parameter passed to the function;

include/ur_ddi.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1459,10 +1459,20 @@ typedef ur_result_t(UR_APICALL *ur_pfnEnqueueCooperativeKernelLaunchExp_t)(
14591459
const ur_event_handle_t *,
14601460
ur_event_handle_t *);
14611461

1462+
///////////////////////////////////////////////////////////////////////////////
1463+
/// @brief Function-pointer for urEnqueueTimestampRecordingExp
1464+
typedef ur_result_t(UR_APICALL *ur_pfnEnqueueTimestampRecordingExp_t)(
1465+
ur_queue_handle_t,
1466+
bool,
1467+
uint32_t,
1468+
const ur_event_handle_t *,
1469+
ur_event_handle_t *);
1470+
14621471
///////////////////////////////////////////////////////////////////////////////
14631472
/// @brief Table of EnqueueExp functions pointers
14641473
typedef struct ur_enqueue_exp_dditable_t {
14651474
ur_pfnEnqueueCooperativeKernelLaunchExp_t pfnCooperativeKernelLaunchExp;
1475+
ur_pfnEnqueueTimestampRecordingExp_t pfnTimestampRecordingExp;
14661476
} ur_enqueue_exp_dditable_t;
14671477

14681478
///////////////////////////////////////////////////////////////////////////////

include/ur_print.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1962,6 +1962,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueWriteHostPipeParams(const stru
19621962
/// - `buff_size < out_size`
19631963
UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueCooperativeKernelLaunchExpParams(const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size);
19641964

1965+
///////////////////////////////////////////////////////////////////////////////
1966+
/// @brief Print ur_enqueue_timestamp_recording_exp_params_t struct
1967+
/// @returns
1968+
/// - ::UR_RESULT_SUCCESS
1969+
/// - ::UR_RESULT_ERROR_INVALID_SIZE
1970+
/// - `buff_size < out_size`
1971+
UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueTimestampRecordingExpParams(const struct ur_enqueue_timestamp_recording_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size);
1972+
19651973
///////////////////////////////////////////////////////////////////////////////
19661974
/// @brief Print ur_bindless_images_unsampled_image_handle_destroy_exp_params_t struct
19671975
/// @returns

include/ur_print.hpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -916,6 +916,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) {
916916
case UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP:
917917
os << "UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP";
918918
break;
919+
case UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP:
920+
os << "UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP";
921+
break;
919922
case UR_FUNCTION_KERNEL_GET_SUGGESTED_LOCAL_WORK_SIZE:
920923
os << "UR_FUNCTION_KERNEL_GET_SUGGESTED_LOCAL_WORK_SIZE";
921924
break;
@@ -2574,6 +2577,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
25742577
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP:
25752578
os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP";
25762579
break;
2580+
case UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP:
2581+
os << "UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP";
2582+
break;
25772583
default:
25782584
os << "unknown enumerator";
25792585
break;
@@ -4283,6 +4289,18 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info
42834289

42844290
os << ")";
42854291
} break;
4292+
case UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP: {
4293+
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
4294+
if (sizeof(ur_bool_t) > size) {
4295+
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
4296+
return UR_RESULT_ERROR_INVALID_SIZE;
4297+
}
4298+
os << (const void *)(tptr) << " (";
4299+
4300+
os << *tptr;
4301+
4302+
os << ")";
4303+
} break;
42864304
default:
42874305
os << "unknown enumerator";
42884306
return UR_RESULT_ERROR_INVALID_ENUMERATION;
@@ -8791,6 +8809,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_command_t value) {
87918809
case UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP:
87928810
os << "UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP";
87938811
break;
8812+
case UR_COMMAND_TIMESTAMP_RECORDING_EXP:
8813+
os << "UR_COMMAND_TIMESTAMP_RECORDING_EXP";
8814+
break;
87948815
default:
87958816
os << "unknown enumerator";
87968817
break;
@@ -14150,6 +14171,48 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
1415014171
return os;
1415114172
}
1415214173

14174+
///////////////////////////////////////////////////////////////////////////////
14175+
/// @brief Print operator for the ur_enqueue_timestamp_recording_exp_params_t type
14176+
/// @returns
14177+
/// std::ostream &
14178+
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_timestamp_recording_exp_params_t *params) {
14179+
14180+
os << ".hQueue = ";
14181+
14182+
ur::details::printPtr(os,
14183+
*(params->phQueue));
14184+
14185+
os << ", ";
14186+
os << ".blocking = ";
14187+
14188+
os << *(params->pblocking);
14189+
14190+
os << ", ";
14191+
os << ".numEventsInWaitList = ";
14192+
14193+
os << *(params->pnumEventsInWaitList);
14194+
14195+
os << ", ";
14196+
os << ".phEventWaitList = {";
14197+
for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) {
14198+
if (i != 0) {
14199+
os << ", ";
14200+
}
14201+
14202+
ur::details::printPtr(os,
14203+
(*(params->pphEventWaitList))[i]);
14204+
}
14205+
os << "}";
14206+
14207+
os << ", ";
14208+
os << ".phEvent = ";
14209+
14210+
ur::details::printPtr(os,
14211+
*(params->pphEvent));
14212+
14213+
return os;
14214+
}
14215+
1415314216
///////////////////////////////////////////////////////////////////////////////
1415414217
/// @brief Print operator for the ur_bindless_images_unsampled_image_handle_destroy_exp_params_t type
1415514218
/// @returns
@@ -17175,6 +17238,9 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_
1717517238
case UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP: {
1717617239
os << (const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *)params;
1717717240
} break;
17241+
case UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP: {
17242+
os << (const struct ur_enqueue_timestamp_recording_exp_params_t *)params;
17243+
} break;
1717817244
case UR_FUNCTION_BINDLESS_IMAGES_UNSAMPLED_IMAGE_HANDLE_DESTROY_EXP: {
1717917245
os << (const struct ur_bindless_images_unsampled_image_handle_destroy_exp_params_t *)params;
1718017246
} break;
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
<%
2+
OneApi=tags['$OneApi']
3+
x=tags['$x']
4+
X=x.upper()
5+
%>
6+
7+
.. _experimental-enqueue-timestamp-recording:
8+
9+
================================================================================
10+
Enqueue Timestamp Recording
11+
================================================================================
12+
13+
.. warning::
14+
15+
Experimental features:
16+
17+
* May be replaced, updated, or removed at any time.
18+
* Do not require maintaining API/ABI stability of their own additions over
19+
time.
20+
* Do not require conformance testing of their own additions.
21+
22+
23+
Motivation
24+
--------------------------------------------------------------------------------
25+
Currently, the only way to get timestamp information is through enabling
26+
profiling on a queue and retrieving the information from events coming from
27+
commands submitted to it. However, not all systems give full control of the
28+
queue construction to the programmer wanting the profiling information. To amend
29+
this, this extension adds the ability to enqueue a timestamp recording on any
30+
queue, with or without profiling enabled. This event can in turn be queried for
31+
the usual profiling information.
32+
33+
34+
API
35+
--------------------------------------------------------------------------------
36+
37+
Enums
38+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
39+
40+
* ${x}_device_info_t
41+
* ${X}_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP
42+
43+
* ${x}_command_t
44+
* ${X}_COMMAND_TIMESTAMP_RECORDING_EXP
45+
46+
Functions
47+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
48+
* ${x}EnqueueTimestampRecordingExp
49+
50+
Changelog
51+
--------------------------------------------------------------------------------
52+
53+
+-----------+------------------------+
54+
| Revision | Changes |
55+
+===========+========================+
56+
| 1.0 | Initial Draft |
57+
+-----------+------------------------+
58+
59+
60+
Support
61+
--------------------------------------------------------------------------------
62+
63+
Adapters which support this experimental feature *must* return true for the new
64+
`${X}_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP` device info query.
65+
66+
67+
Contributors
68+
--------------------------------------------------------------------------------
69+
70+

0 commit comments

Comments
 (0)