Skip to content

Commit 308f1dd

Browse files
committed
Merge remote-tracking branch 'intel/main' into steffen/record_event
2 parents c804856 + fcd3693 commit 308f1dd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+1386
-450
lines changed

.github/CODEOWNERS

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,21 @@
22

33
# Level Zero adapter
44
source/adapters/level_zero @oneapi-src/unified-runtime-level-zero-write
5+
test/adapters/level_zero @oneapi-src/unified-runtime-level-zero-write
56

67
# CUDA and HIP adapters
78
source/adapters/cuda @oneapi-src/unified-runtime-cuda-write
9+
test/adapters/cuda @oneapi-src/unified-runtime-cuda-write
810
source/adapters/hip @oneapi-src/unified-runtime-hip-write
11+
test/adapters/hip @oneapi-src/unified-runtime-hip-write
912

1013
# OpenCL adapter
1114
source/adapters/opencl @oneapi-src/unified-runtime-opencl-write
15+
test/adapters/opencl @oneapi-src/unified-runtime-opencl-write
1216

1317
# Native CPU adapter
1418
source/adapters/native_cpu @oneapi-src/unified-runtime-native-cpu-write
19+
test/adapters/native_cpu @oneapi-src/unified-runtime-native-cpu-write
1520

1621
# Command-buffer experimental feature
1722
source/adapters/**/command_buffer.* @oneapi-src/unified-runtime-command-buffer-write
@@ -20,6 +25,7 @@ scripts/core/exp-command-buffer.yml @oneapi-src/unified-runtime-command-buff
2025
test/conformance/exp_command_buffer** @oneapi-src/unified-runtime-command-buffer-write
2126

2227
# Bindless Images experimental feature
23-
scripts/core/EXP-BINDLESS-IMAGES.rst @oneapi-src/unified-runtime-bindless-images-write
24-
scripts/core/exp-bindless-images.yml @oneapi-src/unified-runtime-bindless-images-write
25-
source/adapters/**/image.* @oneapi-src/unified-runtime-bindless-images-write
28+
source/adapters/**/image.* @oneapi-src/unified-runtime-bindless-images-write
29+
scripts/core/EXP-BINDLESS-IMAGES.rst @oneapi-src/unified-runtime-bindless-images-write
30+
scripts/core/exp-bindless-images.yml @oneapi-src/unified-runtime-bindless-images-write
31+
test/conformance/exp_bindless_images** @oneapi-src/unified-runtime-bindless-images-write

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,13 @@ option(UR_BUILD_EXAMPLE_CODEGEN "Build the codegen example." OFF)
4848
option(VAL_USE_LIBBACKTRACE_BACKTRACE "enable libbacktrace validation backtrace for linux" OFF)
4949
option(UR_ENABLE_ASSERTIONS "Enable assertions for all build types" OFF)
5050
set(UR_DPCXX "" CACHE FILEPATH "Path of the DPC++ compiler executable")
51+
set(UR_DPCXX_BUILD_FLAGS "" CACHE STRING "Build flags to pass to DPC++ when compiling device programs")
5152
set(UR_SYCL_LIBRARY_DIR "" CACHE PATH
5253
"Path of the SYCL runtime library directory")
5354
set(UR_CONFORMANCE_TARGET_TRIPLES "" CACHE STRING
5455
"List of sycl targets to build CTS device binaries for")
5556
set(UR_CONFORMANCE_AMD_ARCH "" CACHE STRING "AMD device target ID to build CTS binaries for")
57+
option(UR_CONFORMANCE_ENABLE_MATCH_FILES "Enable CTS match files" ON)
5658
set(UR_ADAPTER_LEVEL_ZERO_SOURCE_DIR "" CACHE PATH
5759
"Path to external 'level_zero' adapter source dir")
5860
set(UR_ADAPTER_OPENCL_SOURCE_DIR "" CACHE PATH

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
## Table of contents
1414

1515
- [Unified Runtime](#unified-runtime)
16-
- [Adapters](#adapters)
1716
- [Table of contents](#table-of-contents)
1817
- [Contents of the repo](#contents-of-the-repo)
1918
- [Integration](#integration)
@@ -29,7 +28,7 @@
2928
- [Adapter naming convention](#adapter-naming-convention)
3029
- [Source code generation](#source-code-generation)
3130
- [Documentation](#documentation)
32-
6. [Release Process](#release-process)
31+
- [Release Process](#release-process)
3332

3433
## Contents of the repo
3534

@@ -131,6 +130,7 @@ List of options provided by CMake:
131130
| UR_ENABLE_SANITIZER | Enable device sanitizer layer | ON/OFF | ON |
132131
| UR_CONFORMANCE_TARGET_TRIPLES | SYCL triples to build CTS device binaries for | Comma-separated list | spir64 |
133132
| UR_CONFORMANCE_AMD_ARCH | AMD device target ID to build CTS binaries for | string | `""` |
133+
| UR_CONFORMANCE_ENABLE_MATCH_FILES | Enable CTS match files | ON/OFF | ON |
134134
| UR_BUILD_ADAPTER_L0 | Build the Level-Zero adapter | ON/OFF | OFF |
135135
| UR_BUILD_ADAPTER_OPENCL | Build the OpenCL adapter | ON/OFF | OFF |
136136
| UR_BUILD_ADAPTER_CUDA | Build the CUDA adapter | ON/OFF | OFF |
@@ -140,6 +140,7 @@ List of options provided by CMake:
140140
| UR_HIP_PLATFORM | Build HIP adapter for AMD or NVIDIA platform | AMD/NVIDIA | AMD |
141141
| UR_ENABLE_COMGR | Enable comgr lib usage | AMD/NVIDIA | AMD |
142142
| UR_DPCXX | Path of the DPC++ compiler executable to build CTS device binaries | File path | `""` |
143+
| UR_DPCXX_BUILD_FLAGS | Build flags to pass to DPC++ when compiling device programs | Space-separated options list | `""` |
143144
| UR_SYCL_LIBRARY_DIR | Path of the SYCL runtime library directory to build CTS device binaries | Directory path | `""` |
144145
| UR_HIP_ROCM_DIR | Path of the default ROCm HIP installation | Directory path | `/opt/rocm` |
145146
| UR_HIP_INCLUDE_DIR | Path of the ROCm HIP include directory | Directory path | `${UR_HIP_ROCM_DIR}/include` |

scripts/ctest_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def get_cts_test_suite_names(working_directory):
2424
]
2525

2626
def percent(amount, total):
27-
return round((amount / total) * 100, 2)
27+
return round((amount / (total or 1)) * 100, 2)
2828

2929
def summarize_results(results):
3030
total = results['Total']

source/adapters/cuda/device.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -930,10 +930,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
930930
// CUDA supports recording timestamp events.
931931
return ReturnValue(true);
932932
}
933-
case UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP: {
934-
// CUDA supports recording timestamp events.
935-
return ReturnValue(true);
936-
}
937933
case UR_DEVICE_INFO_DEVICE_ID: {
938934
int Value = 0;
939935
UR_CHECK_ERROR(cuDeviceGetAttribute(

source/adapters/cuda/usm.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc,
3333
UR_RESULT_ERROR_INVALID_VALUE);
3434

3535
if (!hPool) {
36-
return USMHostAllocImpl(ppMem, hContext, nullptr, size, alignment);
36+
return USMHostAllocImpl(ppMem, hContext, /* flags */ 0, size, alignment);
3737
}
3838

3939
auto UMFPool = hPool->HostMemPool.get();
@@ -57,7 +57,7 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice,
5757
UR_RESULT_ERROR_INVALID_VALUE);
5858

5959
if (!hPool) {
60-
return USMDeviceAllocImpl(ppMem, hContext, hDevice, nullptr, size,
60+
return USMDeviceAllocImpl(ppMem, hContext, hDevice, /* flags */ 0, size,
6161
alignment);
6262
}
6363

@@ -82,8 +82,8 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice,
8282
UR_RESULT_ERROR_INVALID_VALUE);
8383

8484
if (!hPool) {
85-
return USMSharedAllocImpl(ppMem, hContext, hDevice, nullptr, nullptr, size,
86-
alignment);
85+
return USMSharedAllocImpl(ppMem, hContext, hDevice, /*host flags*/ 0,
86+
/*device flags*/ 0, size, alignment);
8787
}
8888

8989
auto UMFPool = hPool->SharedMemPool.get();
@@ -132,7 +132,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext,
132132
}
133133

134134
ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t Context,
135-
ur_device_handle_t, ur_usm_device_mem_flags_t *,
135+
ur_device_handle_t, ur_usm_device_mem_flags_t,
136136
size_t Size, uint32_t Alignment) {
137137
try {
138138
ScopedContext Active(Context);
@@ -151,8 +151,8 @@ ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t Context,
151151
}
152152

153153
ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t Context,
154-
ur_device_handle_t, ur_usm_host_mem_flags_t *,
155-
ur_usm_device_mem_flags_t *, size_t Size,
154+
ur_device_handle_t, ur_usm_host_mem_flags_t,
155+
ur_usm_device_mem_flags_t, size_t Size,
156156
uint32_t Alignment) {
157157
try {
158158
ScopedContext Active(Context);
@@ -172,7 +172,7 @@ ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t Context,
172172
}
173173

174174
ur_result_t USMHostAllocImpl(void **ResultPtr, ur_context_handle_t Context,
175-
ur_usm_host_mem_flags_t *, size_t Size,
175+
ur_usm_host_mem_flags_t, size_t Size,
176176
uint32_t Alignment) {
177177
try {
178178
ScopedContext Active(Context);
@@ -358,19 +358,19 @@ umf_result_t USMMemoryProvider::get_min_page_size(void *Ptr, size_t *PageSize) {
358358

359359
ur_result_t USMSharedMemoryProvider::allocateImpl(void **ResultPtr, size_t Size,
360360
uint32_t Alignment) {
361-
return USMSharedAllocImpl(ResultPtr, Context, Device, nullptr, nullptr, Size,
362-
Alignment);
361+
return USMSharedAllocImpl(ResultPtr, Context, Device, /*host flags*/ 0,
362+
/*device flags*/ 0, Size, Alignment);
363363
}
364364

365365
ur_result_t USMDeviceMemoryProvider::allocateImpl(void **ResultPtr, size_t Size,
366366
uint32_t Alignment) {
367-
return USMDeviceAllocImpl(ResultPtr, Context, Device, nullptr, Size,
367+
return USMDeviceAllocImpl(ResultPtr, Context, Device, /* flags */ 0, Size,
368368
Alignment);
369369
}
370370

371371
ur_result_t USMHostMemoryProvider::allocateImpl(void **ResultPtr, size_t Size,
372372
uint32_t Alignment) {
373-
return USMHostAllocImpl(ResultPtr, Context, nullptr, Size, Alignment);
373+
return USMHostAllocImpl(ResultPtr, Context, /* flags */ 0, Size, Alignment);
374374
}
375375

376376
ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context,

source/adapters/cuda/usm.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -118,15 +118,15 @@ class USMHostMemoryProvider final : public USMMemoryProvider {
118118

119119
ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t Context,
120120
ur_device_handle_t Device,
121-
ur_usm_device_mem_flags_t *Flags, size_t Size,
121+
ur_usm_device_mem_flags_t Flags, size_t Size,
122122
uint32_t Alignment);
123123

124124
ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t Context,
125125
ur_device_handle_t Device,
126-
ur_usm_host_mem_flags_t *,
127-
ur_usm_device_mem_flags_t *, size_t Size,
126+
ur_usm_host_mem_flags_t,
127+
ur_usm_device_mem_flags_t, size_t Size,
128128
uint32_t Alignment);
129129

130130
ur_result_t USMHostAllocImpl(void **ResultPtr, ur_context_handle_t Context,
131-
ur_usm_host_mem_flags_t *Flags, size_t Size,
131+
ur_usm_host_mem_flags_t Flags, size_t Size,
132132
uint32_t Alignment);

source/adapters/hip/usm.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc,
2727
UR_RESULT_ERROR_INVALID_VALUE);
2828

2929
if (!hPool) {
30-
return USMHostAllocImpl(ppMem, hContext, nullptr, size, alignment);
30+
return USMHostAllocImpl(ppMem, hContext, /* flags */ 0, size, alignment);
3131
}
3232

3333
return umfPoolMallocHelper(hPool, ppMem, size, alignment);
@@ -43,7 +43,7 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice,
4343
UR_RESULT_ERROR_INVALID_VALUE);
4444

4545
if (!hPool) {
46-
return USMDeviceAllocImpl(ppMem, hContext, hDevice, nullptr, size,
46+
return USMDeviceAllocImpl(ppMem, hContext, hDevice, /* flags */ 0, size,
4747
alignment);
4848
}
4949

@@ -60,8 +60,8 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice,
6060
UR_RESULT_ERROR_INVALID_VALUE);
6161

6262
if (!hPool) {
63-
return USMSharedAllocImpl(ppMem, hContext, hDevice, nullptr, nullptr, size,
64-
alignment);
63+
return USMSharedAllocImpl(ppMem, hContext, hDevice, /*host flags*/ 0,
64+
/*device flags*/ 0, size, alignment);
6565
}
6666

6767
return umfPoolMallocHelper(hPool, ppMem, size, alignment);
@@ -105,7 +105,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext,
105105

106106
ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t,
107107
ur_device_handle_t Device,
108-
ur_usm_device_mem_flags_t *, size_t Size,
108+
ur_usm_device_mem_flags_t, size_t Size,
109109
[[maybe_unused]] uint32_t Alignment) {
110110
try {
111111
ScopedContext Active(Device);
@@ -120,8 +120,8 @@ ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t,
120120

121121
ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t,
122122
ur_device_handle_t Device,
123-
ur_usm_host_mem_flags_t *,
124-
ur_usm_device_mem_flags_t *, size_t Size,
123+
ur_usm_host_mem_flags_t,
124+
ur_usm_device_mem_flags_t, size_t Size,
125125
[[maybe_unused]] uint32_t Alignment) {
126126
try {
127127
ScopedContext Active(Device);
@@ -136,7 +136,7 @@ ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t,
136136

137137
ur_result_t USMHostAllocImpl(void **ResultPtr,
138138
[[maybe_unused]] ur_context_handle_t Context,
139-
ur_usm_host_mem_flags_t *, size_t Size,
139+
ur_usm_host_mem_flags_t, size_t Size,
140140
[[maybe_unused]] uint32_t Alignment) {
141141
try {
142142
UR_CHECK_ERROR(hipHostMalloc(ResultPtr, Size));
@@ -309,19 +309,19 @@ umf_result_t USMMemoryProvider::get_min_page_size(void *Ptr, size_t *PageSize) {
309309

310310
ur_result_t USMSharedMemoryProvider::allocateImpl(void **ResultPtr, size_t Size,
311311
uint32_t Alignment) {
312-
return USMSharedAllocImpl(ResultPtr, Context, Device, nullptr, nullptr, Size,
313-
Alignment);
312+
return USMSharedAllocImpl(ResultPtr, Context, Device, /*host flags*/ 0,
313+
/*device flags*/ 0, Size, Alignment);
314314
}
315315

316316
ur_result_t USMDeviceMemoryProvider::allocateImpl(void **ResultPtr, size_t Size,
317317
uint32_t Alignment) {
318-
return USMDeviceAllocImpl(ResultPtr, Context, Device, nullptr, Size,
318+
return USMDeviceAllocImpl(ResultPtr, Context, Device, /* flags */ 0, Size,
319319
Alignment);
320320
}
321321

322322
ur_result_t USMHostMemoryProvider::allocateImpl(void **ResultPtr, size_t Size,
323323
uint32_t Alignment) {
324-
return USMHostAllocImpl(ResultPtr, Context, nullptr, Size, Alignment);
324+
return USMHostAllocImpl(ResultPtr, Context, /* flags */ 0, Size, Alignment);
325325
}
326326

327327
ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context,

source/adapters/hip/usm.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -118,17 +118,17 @@ class USMHostMemoryProvider final : public USMMemoryProvider {
118118

119119
ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t Context,
120120
ur_device_handle_t Device,
121-
ur_usm_device_mem_flags_t *Flags, size_t Size,
121+
ur_usm_device_mem_flags_t Flags, size_t Size,
122122
uint32_t Alignment);
123123

124124
ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t Context,
125125
ur_device_handle_t Device,
126-
ur_usm_host_mem_flags_t *,
127-
ur_usm_device_mem_flags_t *, size_t Size,
126+
ur_usm_host_mem_flags_t,
127+
ur_usm_device_mem_flags_t, size_t Size,
128128
uint32_t Alignment);
129129

130130
ur_result_t USMHostAllocImpl(void **ResultPtr, ur_context_handle_t Context,
131-
ur_usm_host_mem_flags_t *Flags, size_t Size,
131+
ur_usm_host_mem_flags_t Flags, size_t Size,
132132
uint32_t Alignment);
133133

134134
bool checkUSMAlignment(uint32_t &alignment, const ur_usm_desc_t *pUSMDesc);

source/adapters/level_zero/command_buffer.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1048,8 +1048,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
10481048
// Create command-list to execute before `CommandListPtr` and will signal
10491049
// when `EventWaitList` dependencies are complete.
10501050
ur_command_list_ptr_t WaitCommandList{};
1051-
UR_CALL(Queue->Context->getAvailableCommandList(Queue, WaitCommandList,
1052-
false, false));
1051+
UR_CALL(Queue->Context->getAvailableCommandList(
1052+
Queue, WaitCommandList, false, NumEventsInWaitList, EventWaitList,
1053+
false));
10531054

10541055
ZE2UR_CALL(zeCommandListAppendBarrier,
10551056
(WaitCommandList->first, CommandBuffer->WaitEvent->ZeEvent,
@@ -1086,7 +1087,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
10861087
// Create a command-list to signal RetEvent on completion
10871088
ur_command_list_ptr_t SignalCommandList{};
10881089
UR_CALL(Queue->Context->getAvailableCommandList(Queue, SignalCommandList,
1089-
false, false));
1090+
false, NumEventsInWaitList,
1091+
EventWaitList, false));
10901092
// Reset the wait-event for the UR command-buffer that is signaled when its
10911093
// submission dependencies have been satisfied.
10921094
ZE2UR_CALL(zeCommandListAppendEventReset,

0 commit comments

Comments
 (0)