Skip to content

Commit 5d322ee

Browse files
committed
Merge remote-tracking branch 'intel/origin/main' into maronas/ext_composite_device
2 parents 7564829 + 6032f6f commit 5d322ee

30 files changed

+1690
-72
lines changed

.github/docker/ubuntu-22.04.Dockerfile

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (C) 2023 Intel Corporation
1+
# Copyright (C) 2023-2024 Intel Corporation
22
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
33
# See LICENSE.TXT
44
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -51,20 +51,23 @@ RUN apt-get update \
5151
${BASE_DEPS} \
5252
${UR_DEPS} \
5353
${MISC_DEPS} \
54+
&& rm -rf /var/lib/apt/lists/* \
5455
&& apt-get clean all
5556

56-
RUN pip3 install ${UR_PYTHON_DEPS}
57+
# pip package is pinned to a version, but it's probably improperly parsed here
58+
# hadolint ignore=DL3013
59+
RUN pip3 install --no-cache-dir ${UR_PYTHON_DEPS}
5760

5861
# Install DPC++
59-
COPY install_dpcpp.sh install_dpcpp.sh
62+
COPY install_dpcpp.sh /opt/install_dpcpp.sh
6063
ENV DPCPP_PATH=/opt/dpcpp
61-
RUN ./install_dpcpp.sh
64+
RUN /opt/install_dpcpp.sh
6265

6366
# Install libbacktrace
64-
COPY install_libbacktrace.sh install_libbacktrace.sh
65-
RUN ./install_libbacktrace.sh
67+
COPY install_libbacktrace.sh /opt/install_libbacktrace.sh
68+
RUN /opt/install_libbacktrace.sh
6669

6770
# Add a new (non-root) 'user'
6871
ENV USER user
6972
ENV USERPASS pass
70-
RUN useradd -m $USER -g sudo -p `mkpasswd $USERPASS`
73+
RUN useradd -m "${USER}" -g sudo -p "$(mkpasswd ${USERPASS})"

.github/workflows/hadolint.yml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Runs linter for Docker files
2+
name: Hadolint
3+
4+
on:
5+
workflow_dispatch:
6+
push:
7+
pull_request:
8+
paths:
9+
- '.github/docker/*Dockerfile'
10+
- '.github/workflows/hadolint.yml'
11+
12+
concurrency:
13+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
14+
cancel-in-progress: true
15+
16+
permissions:
17+
contents: read
18+
19+
jobs:
20+
linux:
21+
name: Hadolint
22+
runs-on: ubuntu-latest
23+
24+
steps:
25+
- name: Clone the git repo
26+
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
27+
28+
- name: Run Hadolint
29+
uses: hadolint/hadolint-action@54c9adbab1582c2ef04b2016b760714a4bfde3cf # v3.1.0
30+
with:
31+
recursive: true
32+
dockerfile: ".github/docker/*Dockerfile"
33+
# ignore pinning apt packages to versions
34+
ignore: DL3008

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,7 @@ out/
8686

8787
# External content
8888
*/**/external
89+
90+
# VS clangd
91+
/.cache
92+
/compile_commands.json

CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ option(UR_USE_UBSAN "enable UndefinedBehaviorSanitizer" OFF)
3434
option(UR_USE_MSAN "enable MemorySanitizer" OFF)
3535
option(UR_USE_TSAN "enable ThreadSanitizer" OFF)
3636
option(UR_ENABLE_TRACING "enable api tracing through xpti" OFF)
37+
option(UR_ENABLE_SANITIZER "enable device sanitizer" ON)
3738
option(UMF_BUILD_SHARED_LIBRARY "Build UMF as shared library" OFF)
3839
option(UMF_ENABLE_POOL_TRACKING "Build UMF with pool tracking" ON)
3940
option(UR_BUILD_ADAPTER_L0 "Build the Level-Zero adapter" OFF)
@@ -121,6 +122,10 @@ if(UR_ENABLE_TRACING)
121122
endif()
122123
endif()
123124

125+
if(UR_ENABLE_SANITIZER)
126+
add_compile_definitions(UR_ENABLE_SANITIZER)
127+
endif()
128+
124129
if(UR_USE_ASAN)
125130
add_sanitizer_flag(address)
126131
endif()

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ List of options provided by CMake:
126126
| UR_USE_UBSAN | Enable UndefinedBehavior Sanitizer | ON/OFF | OFF |
127127
| UR_USE_MSAN | Enable MemorySanitizer (clang only) | ON/OFF | OFF |
128128
| UR_ENABLE_TRACING | Enable XPTI-based tracing layer | ON/OFF | OFF |
129+
| UR_ENABLE_SANITIZER | Enable device sanitizer layer | ON/OFF | ON |
129130
| UR_CONFORMANCE_TARGET_TRIPLES | SYCL triples to build CTS device binaries for | Comma-separated list | spir64 |
130131
| UR_BUILD_ADAPTER_L0 | Build the Level-Zero adapter | ON/OFF | OFF |
131132
| UR_BUILD_ADAPTER_OPENCL | Build the OpenCL adapter | ON/OFF | OFF |

scripts/core/INTRO.rst

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,15 @@ Unified Runtime loader implements tracing support through the `XPTI framework <h
179179
| **user_data**: A pointer to `function_with_args_t` object, that includes function ID, name, arguments, and return value.
180180
- None
181181

182+
Sanitizers
183+
---------------------
184+
185+
Unified Runtime loader implements the runtime part of device-side sanitizers: AddressSanitizer (`UR_LAYER_ASAN`), MemorySanitizer (`UR_LAYER_MSAN`, planned), and ThreadSanitizer (`UR_LAYER_TSAN`, planned).
186+
187+
This layer shouldn't be enabled explicitly, for example, by the environment variable `UR_ENABLE_LAYERS`, but is enabled by program's runtime (e.g. SYCL/OpenMP Runtime) when the device code is compiled with flag `-fsanitize=address|memory|thread`.
188+
189+
Currently, AddressSanitizer only supports some of the devices on OpenCL and Level-Zero adapters, and this could be extended to support other devices and adapters if UR virtual memory APIs and shadow memory mapping in libdevice are supported.
190+
182191
Logging
183192
---------------------
184193

@@ -260,6 +269,8 @@ Layers currently included with the runtime are as follows:
260269
- Enables UR_LAYER_PARAMETER_VALIDATION and UR_LAYER_LEAK_CHECKING.
261270
* - UR_LAYER_TRACING
262271
- Enables the XPTI tracing layer, see Tracing_ for more detail.
272+
* - UR_LAYER_ASAN \| UR_LAYER_MSAN \| UR_LAYER_TSAN
273+
- Enables the device-side sanitizer layer, see Sanitizers_ for more detail.
263274

264275
Environment Variables
265276
---------------------
@@ -274,6 +285,10 @@ Specific environment variables can be set to control the behavior of unified run
274285

275286
Holds parameters for setting Unified Runtime null adapter logging. The syntax is described in the Logging_ section.
276287

288+
.. envvar:: UR_LOG_SANITIZER
289+
290+
Holds parameters for setting Unified Runtime sanitizer logging. The syntax is described in the Logging_ section.
291+
277292
.. envvar:: UR_LOG_VALIDATION
278293

279294
Holds parameters for setting Unified Runtime validation logging. The syntax is described in the Logging_ section.

source/adapters/level_zero/device.cpp

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
#include "device.hpp"
1212
#include "ur_level_zero.hpp"
13+
#include "ur_util.hpp"
1314
#include <algorithm>
1415
#include <climits>
1516
#include <optional>
@@ -302,9 +303,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(
302303
return ReturnValue(uint32_t{64});
303304
}
304305
case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE:
305-
// if not optimized for 32-bit access, return total memory size.
306-
// otherwise, return only maximum allocatable size.
307-
if (Device->useOptimized32bitAccess() == 0) {
306+
// if the user wishes to allocate large allocations on a system that usually
307+
// does not allow that allocation size, then we return the max global mem
308+
// size as the limit.
309+
if (Device->useRelaxedAllocationLimits()) {
308310
return ReturnValue(uint64_t{calculateGlobalMemSize(Device)});
309311
} else {
310312
return ReturnValue(uint64_t{Device->ZeDeviceProperties->maxMemAllocSize});
@@ -1013,20 +1015,14 @@ ur_device_handle_t_::useImmediateCommandLists() {
10131015
}
10141016
}
10151017

1016-
int32_t ur_device_handle_t_::useOptimized32bitAccess() {
1017-
static const int32_t Optimize32bitAccessMode = [this] {
1018-
// If device is Intel(R) Data Center GPU Max,
1019-
// use default provided by L0 driver.
1020-
// TODO: Use IP versioning to select based on range of devices
1021-
if (this->isPVC())
1022-
return -1;
1023-
const char *UrRet = std::getenv("UR_L0_USE_OPTIMIZED_32BIT_ACCESS");
1024-
if (!UrRet)
1025-
return 0;
1026-
return std::atoi(UrRet);
1018+
bool ur_device_handle_t_::useRelaxedAllocationLimits() {
1019+
static const bool EnableRelaxedAllocationLimits = [] {
1020+
auto UrRet = ur_getenv("UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS");
1021+
const bool RetVal = UrRet ? std::stoi(*UrRet) : 0;
1022+
return RetVal;
10271023
}();
10281024

1029-
return Optimize32bitAccessMode;
1025+
return EnableRelaxedAllocationLimits;
10301026
}
10311027

10321028
ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal,

source/adapters/level_zero/device.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ struct ur_device_handle_t_ : _ur_object {
160160
// provide support for only one, like for Intel(R)
161161
// Data Center GPU Max, for which L0 driver only
162162
// supports stateless.
163-
int32_t useOptimized32bitAccess();
163+
bool useRelaxedAllocationLimits();
164164

165165
bool isSubDevice() { return RootDevice != nullptr; }
166166

source/adapters/level_zero/event.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait(
108108
/* IsInternal */ false));
109109
}
110110

111-
Queue->synchronize();
111+
UR_CALL(Queue->synchronize());
112112

113113
if (OutEvent) {
114114
Queue->LastCommandEvent = reinterpret_cast<ur_event_handle_t>(*OutEvent);
@@ -625,13 +625,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait(
625625
///< events to wait for completion
626626
) {
627627
for (uint32_t I = 0; I < NumEvents; I++) {
628-
if (EventWaitList[I]->UrQueue->ZeEventsScope == OnDemandHostVisibleProxy) {
628+
auto e = EventWaitList[I];
629+
if (e->UrQueue && e->UrQueue->ZeEventsScope == OnDemandHostVisibleProxy) {
629630
// Make sure to add all host-visible "proxy" event signals if needed.
630631
// This ensures that all signalling commands are submitted below and
631632
// thus proxy events can be waited without a deadlock.
632633
//
633-
ur_event_handle_t_ *Event =
634-
ur_cast<ur_event_handle_t_ *>(EventWaitList[I]);
634+
ur_event_handle_t_ *Event = ur_cast<ur_event_handle_t_ *>(e);
635635
if (!Event->hasExternalRefs())
636636
die("urEventsWait must not be called for an internal event");
637637

@@ -782,6 +782,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle(
782782
Context, UR_EXT_COMMAND_TYPE_USER,
783783
Properties->isNativeHandleOwned);
784784

785+
UREvent->RefCountExternal++;
786+
785787
} catch (const std::bad_alloc &) {
786788
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
787789
} catch (...) {

source/adapters/level_zero/program.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp(
161161
ZeBuildOptions += pOptions;
162162
}
163163

164-
if (phDevices[0]->useOptimized32bitAccess() == 0) {
164+
if (phDevices[0]->useRelaxedAllocationLimits()) {
165165
ZeBuildOptions += " -ze-opt-greater-than-4GB-buffer-required";
166166
}
167167

@@ -256,7 +256,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile(
256256
// ze-opt-greater-than-4GB-buffer-required to disable
257257
// stateful optimizations and be able to use larger than
258258
// 4GB allocations on these kernels.
259-
if (Context->Devices[0]->useOptimized32bitAccess() == 0) {
259+
if (Context->Devices[0]->useRelaxedAllocationLimits()) {
260260
Program->BuildFlags += " -ze-opt-greater-than-4GB-buffer-required";
261261
}
262262
}

0 commit comments

Comments
 (0)