Skip to content

Commit 6664de1

Browse files
authored
Merge branch 'sycl' into root_group_query
2 parents 27be1dd + 74cda4b commit 6664de1

File tree

60 files changed

+325
-82
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+325
-82
lines changed

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6969,6 +6969,7 @@ bool SYCLIntegrationFooter::emit(raw_ostream &OS) {
69696969
Policy.adjustForCPlusPlusFwdDecl();
69706970
Policy.SuppressTypedefs = true;
69716971
Policy.SuppressUnwrittenScope = true;
6972+
Policy.PrintCanonicalTypes = true;
69726973

69736974
llvm::SmallSet<const VarDecl *, 8> Visited;
69746975
bool EmittedFirstSpecConstant = false;
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -fsycl-int-footer=%t.footer.h -emit-llvm %s -o -
2+
// RUN: FileCheck -input-file=%t.footer.h %s --check-prefix=CHECK-FOOTER
3+
4+
// This test checks that integration footer is emitted correctly when a
5+
// device_global has an explicit template specialization in template arguments.
6+
7+
#include "sycl.hpp"
8+
9+
namespace sycl {
10+
template <typename T> struct X {};
11+
template <> struct X<int> {};
12+
namespace detail {
13+
struct Y {};
14+
} // namespace detail
15+
template <> struct X<detail::Y> {};
16+
} // namespace sycl
17+
18+
using namespace sycl;
19+
template <typename T, typename = X<detail::Y>> struct Arg1 { T val; };
20+
21+
using namespace sycl::ext::oneapi;
22+
template <typename properties_t>
23+
device_global<properties_t> dev_global;
24+
25+
SYCL_EXTERNAL auto foo() {
26+
(void)dev_global<Arg1<int>>;
27+
}
28+
29+
// CHECK-FOOTER: __sycl_device_global_registration::__sycl_device_global_registration() noexcept {
30+
// CHECK-FOOTER-NEXT: device_global_map::add((void *)&::dev_global<Arg1<int, sycl::X<sycl::detail::Y>>>, "_Z10dev_globalI4Arg1IiN4sycl1XINS1_6detail1YEEEEE");
31+
// CHECK-FOOTER-NEXT: }
32+
// CHECK-FOOTER-NEXT: } // namespace (unnamed)
Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
# commit 2f479e01f62ed96a9466d19d9df1aabeda77b335
2-
# Merge: b0f9293d b693389e
1+
# commit 38ee6ce2a0400573c0c7c5da782bc32ff578fcc4
2+
# Merge: c7086f7f 91b6db04
33
# Author: Callum Fare <[email protected]>
4-
# Date: Fri Nov 22 15:27:05 2024 +0000
5-
# Merge pull request #2370 from againull/multi_device_sanitizer
6-
# [L0] Check that program is in exe state in urProgramGetGlobalVariablePointer
7-
set(UNIFIED_RUNTIME_TAG f99adf104ef5ce94d66b6ed4f9dccc450f1f638c)
4+
# Date: Mon Nov 25 11:04:37 2024 +0000
5+
# Merge pull request #2303 from nrspruit/zeInitDrivers
6+
# [L0] Implement Support for zeInitDrivers
7+
set(UNIFIED_RUNTIME_TAG 39b72622cf2997459788000ac665e259f728a9ff)

sycl/doc/design/CommandGraph.md

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,62 @@ Backends which are implemented currently are: [Level Zero](#level-zero),
337337

338338
### Level Zero
339339

340+
The command-buffer implementation for the level-zero adapter has 2 different
341+
implementation paths which are chosen depending on the device and level-zero
342+
version:
343+
344+
- Immediate Append path - Relies on
345+
[zeCommandListImmediateAppendCommandListsExp](https://oneapi-src.github.io/level-zero-spec/level-zero/latest/core/api.html#zecommandlistimmediateappendcommandlistsexp)
346+
to submit the command-buffer. This function is an experimental extension to the level-zero API.
347+
- Wait event path - Relies on
348+
[zeCommandQueueExecuteCommandLists](https://oneapi-src.github.io/level-zero-spec/level-zero/latest/core/api.html#zecommandqueueexecutecommandlists)
349+
to submit the command-buffer work. However, this level-zero function has
350+
limitations and, as such, this path is used only when the immediate append
351+
path is unavailable.
352+
353+
#### Immediate Append Path Implementation Details
354+
355+
This path is only available when the device supports immediate command-lists
356+
and the [zeCommandListImmediateAppendCommandListsExp](https://oneapi-src.github.io/level-zero-spec/level-zero/latest/core/api.html#zecommandlistimmediateappendcommandlistsexp)
357+
API. This API can wait on a list of event dependencies using the `phWaitEvents`
358+
parameter and can signal a return event when finished using the `hSignalEvent`
359+
parameter. This allows for a cleaner and more efficient implementation than
360+
what can be achieved when using the wait-event path
361+
(see [this section](#wait-event-path-implementation-details) for
362+
more details about the wait-event path).
363+
364+
This path relies on 3 different command-lists in order to execute the
365+
command-buffer:
366+
367+
- `ComputeCommandList` - Used to submit command-buffer work that requires
368+
the compute engine.
369+
- `CopyCommandList` - Used to submit command-buffer work that requires the
370+
[copy engine](#copy-engine). This command-list is not created when none of the
371+
nodes require the copy engine.
372+
- `EventResetCommandList` - Used to reset the level-zero events that are
373+
needed for every submission of the command-buffer. This is executed after
374+
the compute and copy command-lists have finished executing. For the first
375+
execution, this command-list is skipped since there is no need to reset events
376+
at this point. When counter-based events are enabled (i.e. the command-buffer
377+
is in-order), this command-list is not created since counter-based events do
378+
not need to be reset.
379+
380+
The following diagram illustrates which commands are executed on
381+
each command-list when the command-buffer is enqueued:
382+
![L0 command-buffer diagram](images/diagram_immediate_append.png)
383+
384+
Additionally,
385+
[zeCommandListImmediateAppendCommandListsExp](https://oneapi-src.github.io/level-zero-spec/level-zero/latest/core/api.html#zecommandlistimmediateappendcommandlistsexp)
386+
requires an extra command-list which is used to submit the other
387+
command-lists. This command-list has a specific engine type
388+
associated to it (i.e. compute or copy engine). Hence, for our implementation,
389+
we need 2 of these helper command-lists:
390+
- The `CommandListHelper` command-list is used to submit the
391+
`ComputeCommandList`, `CommandListResetEvents` and profiling queries.
392+
- The `ZeCopyEngineImmediateListHelper` command-list is used to submit the
393+
`CopyCommandList`
394+
395+
#### Wait event Path Implementation Details
340396
The UR `urCommandBufferEnqueueExp` interface for submitting a command-buffer
341397
takes a list of events to wait on, and returns an event representing the
342398
completion of that specific submission of the command-buffer.
@@ -364,7 +420,7 @@ is made only once (during the command-buffer finalization stage). This allows
364420
the adapter to save time when submitting the command-buffer, by executing only
365421
this command-list (i.e. without enqueuing any commands of the graph workload).
366422

367-
#### Prefix
423+
##### Prefix
368424

369425
The prefix's commands aim to:
370426
1. Handle the list of events to wait on, which is passed by the runtime
@@ -409,7 +465,7 @@ and another reset command for resetting the signal we use to signal the
409465
completion of the graph workload. This signal is called *SignalEvent* and is
410466
defined in the `ur_exp_command_buffer_handle_t` class.
411467

412-
#### Suffix
468+
##### Suffix
413469

414470
The suffix's commands aim to:
415471
1) Handle the completion of the graph workload and signal a UR return event.
@@ -435,7 +491,7 @@ with extra commands associated with *CB*, and the other after *CB*. These new
435491
command-lists are retrieved from the UR queue, which will likely reuse existing
436492
command-lists and only create a new one in the worst case.
437493

438-
#### Drawbacks
494+
##### Drawbacks
439495

440496
There are three drawbacks of this approach to implementing UR command-buffers for
441497
Level Zero:
138 KB
Loading

sycl/test-e2e/AddressSanitizer/bad-free/bad-free-host.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// REQUIRES: linux
1+
// REQUIRES: linux, cpu || (gpu && level_zero)
22
// RUN: %{build} %device_asan_flags -O0 -g -o %t
33
// RUN: %force_device_asan_rt %{run} not %t 2>&1 | FileCheck %s
44
#include <sycl/usm.hpp>

sycl/test-e2e/AddressSanitizer/bad-free/bad-free-minus1.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// REQUIRES: linux
1+
// REQUIRES: linux, cpu || (gpu && level_zero)
22
// RUN: %{build} %device_asan_flags -O0 -g -o %t1.out
33
// RUN: %force_device_asan_rt %{run} not %t1.out 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s
44
// RUN: %{build} %device_asan_flags -DMALLOC_HOST -O0 -g -o %t2.out

sycl/test-e2e/AddressSanitizer/bad-free/bad-free-plus1.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// REQUIRES: linux
1+
// REQUIRES: linux, cpu || (gpu && level_zero)
22
// RUN: %{build} %device_asan_flags -O0 -g -o %t1.out
33
// RUN: %force_device_asan_rt %{run} not %t1.out 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s
44
// RUN: %{build} %device_asan_flags -DMALLOC_HOST -O0 -g -o %t2.out

sycl/test-e2e/AddressSanitizer/common/demangle-kernel-name.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// REQUIRES: linux
1+
// REQUIRES: linux, cpu || (gpu && level_zero)
22
// RUN: %{build} %device_asan_flags -O2 -g -o %t
33
// RUN: %{run} not %t &> %t.txt ; FileCheck --input-file %t.txt %s
44
#include <sycl/detail/core.hpp>

sycl/test-e2e/AddressSanitizer/common/kernel-debug.cpp renamed to sycl/test-e2e/AddressSanitizer/common/options-debug.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
// REQUIRES: linux
1+
// REQUIRES: linux, cpu || (gpu && level_zero)
22
// RUN: %{build} %device_asan_flags -O2 -g -o %t
3-
// RUN: env UR_LAYER_ASAN_OPTIONS=debug:1 %{run} %t 2>&1 | FileCheck --check-prefixes CHECK-DEBUG %s
43
// RUN: env UR_LAYER_ASAN_OPTIONS=debug:0 %{run} %t 2>&1 | FileCheck %s
54

65
#include <sycl/usm.hpp>
@@ -23,7 +22,6 @@ int main() {
2322
});
2423
});
2524
Q.wait();
26-
// CHECK-DEBUG: [kernel]
2725
// CHECK-NOT: [kernel]
2826

2927
sycl::free(array, Q);

0 commit comments

Comments
 (0)