Skip to content

Commit c8ebd92

Browse files
author
Fábio
authored
[SYCL][Graph] Add e2e test for launching an l0 kernel in a host_task (#15170)
Adds an e2e test that checks that launching a kernel using level-zero native handles in a graph's host_task works as expected.
1 parent b24f1e3 commit c8ebd92

File tree

5 files changed

+229
-0
lines changed

5 files changed

+229
-0
lines changed
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// REQUIRES: level_zero, level_zero_dev_kit
2+
// L0 plugin incorrectly reports memory leaks because it doesn't take into
3+
// account direct calls to the L0 API.
4+
// UNSUPPORTED: ze_debug
5+
// RUN: %{build} %level_zero_options -o %t.out
6+
// RUN: %{run} %t.out %S/../Inputs/Kernels/saxpy.spv
7+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
8+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out %S/../Inputs/Kernels/saxpy.spv 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
9+
// Extra run to check for immediate-command-list in Level Zero
10+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out %S/../Inputs/Kernels/saxpy.spv 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
11+
12+
#define GRAPH_E2E_EXPLICIT
13+
14+
#include "../Inputs/interop-level-zero-launch-kernel.cpp"
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// Source for saxpy.spv
2+
// Compiled using dpcpp: clang++ saxpy.cpp -fsycl -o saxpy.cpp.out
3+
// Extracted using: clang-offload-extract saxpy.cpp.out
4+
5+
#include <sycl/detail/core.hpp>
6+
#include <sycl/usm.hpp>
7+
8+
int main() {
9+
size_t array_size = 16;
10+
11+
sycl::queue sycl_queue;
12+
uint32_t *X = sycl::malloc_device<uint32_t>(array_size, sycl_queue);
13+
uint32_t *Z = sycl::malloc_device<uint32_t>(array_size, sycl_queue);
14+
15+
sycl_queue.submit([&](sycl::handler &cgh) {
16+
cgh.parallel_for<class saxpy>(sycl::range<1>{array_size},
17+
[=](sycl::item<1> itemId) {
18+
constexpr uint32_t A = 2;
19+
Z[itemId] = X[itemId] * A + Z[itemId];
20+
});
21+
});
22+
return 0;
23+
}
6.33 KB
Binary file not shown.
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
// Test that launching a kernel using level-zero interop in a graph's host_task
2+
// works as expected.
3+
4+
#include "../graph_common.hpp"
5+
#include <level_zero/ze_api.h>
6+
#include <sycl/ext/oneapi/backend/level_zero.hpp>
7+
#include <sycl/interop_handle.hpp>
8+
9+
bool getDevice(device &OutDevice, backend Backend) {
10+
auto Platforms = platform::get_platforms();
11+
platform L0Platform;
12+
for (auto &Platform : Platforms) {
13+
if (Platform.get_backend() == Backend) {
14+
L0Platform = Platform;
15+
}
16+
}
17+
18+
auto Devices = L0Platform.get_devices();
19+
for (auto &Device : Devices) {
20+
if (Device.get_backend() == Backend) {
21+
OutDevice = Device;
22+
return true;
23+
}
24+
}
25+
return false;
26+
}
27+
28+
std::vector<uint8_t> loadSpirvFromFile(std::string FileName) {
29+
std::ifstream SpvStream(FileName, std::ios::binary);
30+
SpvStream.seekg(0, std::ios::end);
31+
size_t sz = SpvStream.tellg();
32+
SpvStream.seekg(0);
33+
std::vector<uint8_t> Spv(sz);
34+
SpvStream.read(reinterpret_cast<char *>(Spv.data()), sz);
35+
36+
return Spv;
37+
}
38+
39+
int main(int, char **argv) {
40+
41+
device Device;
42+
if (!getDevice(Device, backend::ext_oneapi_level_zero)) {
43+
// No suitable device found.
44+
return 0;
45+
}
46+
47+
std::vector<uint8_t> Spirv = loadSpirvFromFile(argv[1]);
48+
49+
const sycl::context Context{Device};
50+
queue Queue{Context, Device};
51+
52+
std::vector<uint32_t> HostZ(Size);
53+
std::vector<uint32_t> HostX(Size);
54+
std::vector<uint32_t> ReferenceZ(Size);
55+
std::vector<uint32_t> ReferenceX(Size);
56+
57+
std::iota(HostZ.begin(), HostZ.end(), 1);
58+
std::iota(HostX.begin(), HostX.end(), 10);
59+
60+
for (int i = 0; i < Size; ++i) {
61+
ReferenceZ[i] = HostX[i] * 2 + HostZ[i];
62+
ReferenceX[i] = HostX[i];
63+
}
64+
65+
uint32_t *MemZ = malloc_device<uint32_t>(Size, Queue);
66+
uint32_t *MemX = malloc_device<uint32_t>(Size, Queue);
67+
68+
exp_ext::command_graph Graph{Context, Device};
69+
70+
auto NodeA = add_node(
71+
Graph, Queue, [&](handler &CGH) { CGH.copy(HostZ.data(), MemZ, Size); });
72+
73+
auto NodeB = add_node(
74+
Graph, Queue, [&](handler &CGH) { CGH.copy(HostX.data(), MemX, Size); });
75+
76+
auto NodeC = add_node(
77+
Graph, Queue,
78+
[&](handler &CGH) {
79+
depends_on_helper(CGH, {NodeA, NodeB});
80+
CGH.host_task([&]() {
81+
auto ZeContext = get_native<backend::ext_oneapi_level_zero>(Context);
82+
auto ZeDevice = get_native<backend::ext_oneapi_level_zero>(Device);
83+
84+
ze_result_t status;
85+
ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC,
86+
nullptr,
87+
ZE_MODULE_FORMAT_IL_SPIRV,
88+
Spirv.size(),
89+
Spirv.data(),
90+
nullptr,
91+
nullptr};
92+
ze_module_handle_t ZeModule;
93+
status = zeModuleCreate(ZeContext, ZeDevice, &moduleDesc, &ZeModule,
94+
nullptr);
95+
assert(status == ZE_RESULT_SUCCESS);
96+
97+
ze_kernel_desc_t kernelDesc = {
98+
ZE_STRUCTURE_TYPE_KERNEL_DESC, nullptr, 0,
99+
"_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E5saxpy"};
100+
ze_kernel_handle_t ZeKernel;
101+
status = zeKernelCreate(ZeModule, &kernelDesc, &ZeKernel);
102+
assert(status == ZE_RESULT_SUCCESS);
103+
104+
auto ZeCommandQueueDesc =
105+
ze_command_queue_desc_t{ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
106+
nullptr,
107+
0,
108+
0,
109+
0,
110+
ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS,
111+
ZE_COMMAND_QUEUE_PRIORITY_NORMAL};
112+
113+
ze_command_list_handle_t ZeCommandList;
114+
status = zeCommandListCreateImmediate(
115+
ZeContext, ZeDevice, &ZeCommandQueueDesc, &ZeCommandList);
116+
assert(status == ZE_RESULT_SUCCESS);
117+
118+
status = zeKernelSetArgumentValue(ZeKernel, 0,
119+
Size * sizeof(uint32_t), &MemZ);
120+
assert(status == ZE_RESULT_SUCCESS);
121+
status = zeKernelSetArgumentValue(ZeKernel, 1,
122+
Size * sizeof(uint32_t), &MemX);
123+
assert(status == ZE_RESULT_SUCCESS);
124+
ze_group_count_t ZeGroupCount{Size, 1, 1};
125+
126+
zeKernelSetGroupSize(ZeKernel, 1024, 1, 1);
127+
assert(status == ZE_RESULT_SUCCESS);
128+
129+
status = zeCommandListAppendLaunchKernel(
130+
ZeCommandList, ZeKernel, &ZeGroupCount, nullptr, 0, nullptr);
131+
132+
assert(status == ZE_RESULT_SUCCESS);
133+
134+
status = zeCommandListHostSynchronize(ZeCommandList, 0);
135+
assert(status == ZE_RESULT_SUCCESS);
136+
137+
status = zeCommandListDestroy(ZeCommandList);
138+
assert(status == ZE_RESULT_SUCCESS);
139+
140+
status = zeKernelDestroy(ZeKernel);
141+
assert(status == ZE_RESULT_SUCCESS);
142+
143+
status = zeModuleDestroy(ZeModule);
144+
assert(status == ZE_RESULT_SUCCESS);
145+
});
146+
},
147+
NodeA, NodeB);
148+
149+
auto NodeD = add_node(
150+
Graph, Queue,
151+
[&](handler &CGH) {
152+
depends_on_helper(CGH, NodeC);
153+
CGH.copy(MemZ, HostZ.data(), Size);
154+
},
155+
NodeC);
156+
157+
auto NodeE = add_node(
158+
Graph, Queue,
159+
[&](handler &CGH) {
160+
depends_on_helper(CGH, NodeC);
161+
CGH.copy(MemX, HostX.data(), Size);
162+
},
163+
NodeC);
164+
165+
auto GraphExec = Graph.finalize();
166+
Queue.ext_oneapi_graph(GraphExec);
167+
Queue.wait_and_throw();
168+
169+
sycl::free(MemZ, Context);
170+
sycl::free(MemX, Context);
171+
172+
for (uint32_t i = 0; i < Size; ++i) {
173+
assert(check_value(i, ReferenceZ[i], HostZ[i], "HostZ"));
174+
assert(check_value(i, ReferenceX[i], HostX[i], "HostX"));
175+
}
176+
177+
return 0;
178+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// REQUIRES: level_zero, level_zero_dev_kit
2+
// L0 plugin incorrectly reports memory leaks because it doesn't take into
3+
// account direct calls to the L0 API.
4+
// UNSUPPORTED: ze_debug
5+
// RUN: %{build} %level_zero_options -o %t.out
6+
// RUN: %{run} %t.out %S/../Inputs/Kernels/saxpy.spv
7+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
8+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out %S/../Inputs/Kernels/saxpy.spv 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
9+
// Extra run to check for immediate-command-list in Level Zero
10+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out %S/../Inputs/Kernels/saxpy.spv 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
11+
12+
#define GRAPH_E2E_RECORD_REPLAY
13+
14+
#include "../Inputs/interop-level-zero-launch-kernel.cpp"

0 commit comments

Comments
 (0)