Skip to content

Commit 7ddaf7d

Browse files
author
Konrad Kusiak
committed
Added graph tests which use free function extension
1 parent 51d92a3 commit 7ddaf7d

11 files changed

+817
-0
lines changed
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#include "sycl/ext/oneapi/kernel_properties/properties.hpp"
2+
#include "sycl/kernel_bundle.hpp"
3+
#include <sycl/ext/oneapi/free_function_queries.hpp>
4+
5+
namespace exp_ext = sycl::ext::oneapi::experimental;
6+
using namespace sycl;
7+
8+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::single_task_kernel))
9+
void ff_0(int *ptr, size_t size) {
10+
for (size_t i{0}; i < size; ++i) {
11+
ptr[i] = i;
12+
}
13+
}
14+
15+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::single_task_kernel))
16+
void ff_1(int *ptr, size_t size) {
17+
for (size_t i{0}; i < size; ++i) {
18+
ptr[i] += i;
19+
}
20+
}
21+
22+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::single_task_kernel))
23+
void ff_2(int *ptr, size_t size, size_t numKernelLoops) {
24+
for (size_t j = 0; j < numKernelLoops; j++) {
25+
for (size_t i = 0; i < size; i++) {
26+
ptr[i] += i;
27+
}
28+
}
29+
}
30+
31+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::nd_range_kernel<3>))
32+
void ff_3(int *ptr) {
33+
size_t GlobalID =
34+
ext::oneapi::this_work_item::get_nd_item<3>().get_global_linear_id();
35+
ptr[GlobalID] = GlobalID;
36+
}
37+
38+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::nd_range_kernel<3>))
39+
void ff_4(int *ptr) {
40+
size_t GlobalID =
41+
ext::oneapi::this_work_item::get_nd_item<3>().get_global_linear_id();
42+
ptr[GlobalID] *= 2;
43+
}
44+
45+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::nd_range_kernel<1>))
46+
void ff_5(int *ptrA, int *ptrB, int *ptrC) {
47+
size_t id = ext::oneapi::this_work_item::get_nd_item<1>().get_global_id();
48+
ptrC[id] += ptrA[id] * ptrB[id];
49+
}
50+
51+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::single_task_kernel))
52+
void ff_6(int *ptr, int scalarValue, size_t size) {
53+
for (size_t i{0}; i < size; ++i) {
54+
ptr[i] = scalarValue;
55+
}
56+
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
//
8+
// The name mangling for free function kernels currently does not work with PTX.
9+
// UNSUPPORTED: cuda
10+
11+
// Tests updating a graph node before finalization
12+
13+
#include "../../graph_common.hpp"
14+
#include "free_function_kernels.hpp"
15+
16+
int main() {
17+
queue Queue{};
18+
context ctxt{Queue.get_context()};
19+
20+
const size_t N = 1024;
21+
22+
exp_ext::command_graph Graph{ctxt, Queue.get_device()};
23+
24+
int *PtrA = malloc_device<int>(N, Queue);
25+
int *PtrB = malloc_device<int>(N, Queue);
26+
27+
std::vector<int> HostDataA(N);
28+
std::vector<int> HostDataB(N);
29+
30+
Queue.memset(PtrA, 0, N * sizeof(int)).wait();
31+
Queue.memset(PtrB, 0, N * sizeof(int)).wait();
32+
33+
exp_ext::dynamic_parameter InputParam(Graph, PtrA);
34+
35+
#ifndef __SYCL_DEVICE_ONLY__
36+
kernel_bundle Bundle = get_kernel_bundle<bundle_state::executable>(ctxt);
37+
kernel_id Kernel_id = exp_ext::get_kernel_id<ff_0>();
38+
kernel Kernel = Bundle.get_kernel(Kernel_id);
39+
auto KernelNode = Graph.add([&](handler &cgh) {
40+
cgh.set_arg(0, InputParam);
41+
cgh.set_arg(1, N);
42+
cgh.single_task(Kernel);
43+
});
44+
// Swap PtrB to be the input
45+
InputParam.update(PtrB);
46+
47+
auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{});
48+
49+
// Only PtrB should be filled with values
50+
Queue.ext_oneapi_graph(ExecGraph).wait();
51+
52+
Queue.copy(PtrA, HostDataA.data(), N).wait();
53+
Queue.copy(PtrB, HostDataB.data(), N).wait();
54+
for (size_t i = 0; i < N; i++) {
55+
assert(HostDataA[i] == 0);
56+
assert(HostDataB[i] == i);
57+
}
58+
#endif
59+
return 0;
60+
}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
//
8+
// The name mangling for free function kernels currently does not work with PTX.
9+
// UNSUPPORTED: cuda
10+
11+
// Tests creating multiple executable graphs from the same modifiable graph and
12+
// only updating one of them.
13+
14+
#include "../../graph_common.hpp"
15+
#include "free_function_kernels.hpp"
16+
17+
int main() {
18+
queue Queue{};
19+
context ctxt{Queue.get_context()};
20+
21+
const size_t N = 1024;
22+
23+
exp_ext::command_graph Graph{ctxt, Queue.get_device()};
24+
25+
int *PtrA = malloc_device<int>(N, Queue);
26+
int *PtrB = malloc_device<int>(N, Queue);
27+
28+
std::vector<int> HostDataA(N);
29+
std::vector<int> HostDataB(N);
30+
31+
Queue.memset(PtrA, 0, N * sizeof(int)).wait();
32+
Queue.memset(PtrB, 0, N * sizeof(int)).wait();
33+
34+
exp_ext::dynamic_parameter InputParam(Graph, PtrA);
35+
36+
#ifndef __SYCL_DEVICE_ONLY__
37+
kernel_bundle Bundle = get_kernel_bundle<bundle_state::executable>(ctxt);
38+
kernel_id Kernel_id = exp_ext::get_kernel_id<ff_1>();
39+
kernel Kernel = Bundle.get_kernel(Kernel_id);
40+
auto KernelNode = Graph.add([&](handler &cgh) {
41+
cgh.set_arg(0, InputParam);
42+
cgh.set_arg(1, N);
43+
cgh.single_task(Kernel);
44+
});
45+
46+
auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{});
47+
auto ExecGraph2 = Graph.finalize(exp_ext::property::graph::updatable{});
48+
49+
// PtrA values should be modified twice
50+
Queue.ext_oneapi_graph(ExecGraph).wait();
51+
Queue.ext_oneapi_graph(ExecGraph2).wait();
52+
53+
Queue.copy(PtrA, HostDataA.data(), N).wait();
54+
Queue.copy(PtrB, HostDataB.data(), N).wait();
55+
for (size_t i = 0; i < N; i++) {
56+
assert(HostDataA[i] == i * 2);
57+
assert(HostDataB[i] == 0);
58+
}
59+
60+
// Swap PtrB to be the input
61+
InputParam.update(PtrB);
62+
// Only update ExecGraph, which should now modify PtrB while ExecGraph2
63+
// modifies PtrA still
64+
ExecGraph.update(KernelNode);
65+
Queue.ext_oneapi_graph(ExecGraph).wait();
66+
Queue.ext_oneapi_graph(ExecGraph2).wait();
67+
68+
Queue.copy(PtrA, HostDataA.data(), N).wait();
69+
Queue.copy(PtrB, HostDataB.data(), N).wait();
70+
for (size_t i = 0; i < N; i++) {
71+
// A should have been modified 3 times by now, B only once
72+
assert(HostDataA[i] == i * 3);
73+
assert(HostDataB[i] == i);
74+
}
75+
#endif
76+
return 0;
77+
}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
//
8+
// The name mangling for free function kernels currently does not work with PTX.
9+
// UNSUPPORTED: cuda
10+
11+
// Tests that updating a graph is ordered with respect to previous executions of
12+
// the graph which may be in flight.
13+
14+
#include "../../graph_common.hpp"
15+
#include "free_function_kernels.hpp"
16+
17+
int main() {
18+
queue Queue{};
19+
context ctxt{Queue.get_context()};
20+
21+
// Use a large N to try and make the kernel slow
22+
const size_t N = 1 << 16;
23+
// Loop inside kernel to make even slower (too large N runs out of memory)
24+
const size_t NumKernelLoops = 4;
25+
const size_t NumSubmitLoops = 8;
26+
27+
exp_ext::command_graph Graph{ctxt, Queue.get_device()};
28+
29+
int *PtrA = malloc_device<int>(N, Queue);
30+
int *PtrB = malloc_device<int>(N, Queue);
31+
32+
std::vector<int> HostDataA(N);
33+
std::vector<int> HostDataB(N);
34+
35+
Queue.memset(PtrA, 0, N * sizeof(int)).wait();
36+
Queue.memset(PtrB, 0, N * sizeof(int)).wait();
37+
38+
exp_ext::dynamic_parameter InputParam(Graph, PtrA);
39+
40+
#ifndef __SYCL_DEVICE_ONLY__
41+
kernel_bundle Bundle = get_kernel_bundle<bundle_state::executable>(ctxt);
42+
kernel_id Kernel_id = exp_ext::get_kernel_id<ff_2>();
43+
kernel Kernel = Bundle.get_kernel(Kernel_id);
44+
auto KernelNode = Graph.add([&](handler &cgh) {
45+
cgh.set_arg(0, InputParam);
46+
cgh.set_arg(1, N);
47+
cgh.set_arg(2, NumKernelLoops);
48+
cgh.single_task(Kernel);
49+
});
50+
51+
auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{});
52+
53+
// Submit a bunch of graphs without waiting
54+
for (size_t i = 0; i < NumSubmitLoops; i++) {
55+
Queue.ext_oneapi_graph(ExecGraph);
56+
}
57+
58+
// Swap PtrB to be the input
59+
InputParam.update(PtrB);
60+
61+
ExecGraph.update(KernelNode);
62+
63+
// Submit another set of graphs then wait on all submissions
64+
for (size_t i = 0; i < NumSubmitLoops; i++) {
65+
Queue.ext_oneapi_graph(ExecGraph);
66+
}
67+
Queue.wait_and_throw();
68+
69+
Queue.copy(PtrA, HostDataA.data(), N).wait();
70+
Queue.copy(PtrB, HostDataB.data(), N).wait();
71+
for (size_t i = 0; i < N; i++) {
72+
assert(HostDataA[i] == i * NumKernelLoops * NumSubmitLoops);
73+
assert(HostDataB[i] == i * NumKernelLoops * NumSubmitLoops);
74+
}
75+
#endif
76+
return 0;
77+
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
//
8+
// The name mangling for free function kernels currently does not work with PTX.
9+
// UNSUPPORTED: cuda
10+
11+
// Tests updating a graph node using index-based explicit update
12+
13+
#include "../../graph_common.hpp"
14+
#include "free_function_kernels.hpp"
15+
16+
int main() {
17+
queue Queue{};
18+
context ctxt{Queue.get_context()};
19+
20+
const size_t N = 1024;
21+
22+
exp_ext::command_graph Graph{ctxt, Queue.get_device()};
23+
24+
int *PtrA = malloc_device<int>(N, Queue);
25+
int *PtrB = malloc_device<int>(N, Queue);
26+
27+
std::vector<int> HostDataA(N);
28+
std::vector<int> HostDataB(N);
29+
30+
Queue.memset(PtrA, 0, N * sizeof(int)).wait();
31+
Queue.memset(PtrB, 0, N * sizeof(int)).wait();
32+
33+
exp_ext::dynamic_parameter InputParam(Graph, PtrA);
34+
35+
#ifndef __SYCL_DEVICE_ONLY__
36+
kernel_bundle Bundle = get_kernel_bundle<bundle_state::executable>(ctxt);
37+
kernel_id Kernel_id = exp_ext::get_kernel_id<ff_0>();
38+
kernel Kernel = Bundle.get_kernel(Kernel_id);
39+
auto KernelNode = Graph.add([&](handler &cgh) {
40+
cgh.set_arg(0, InputParam);
41+
cgh.set_arg(0, N);
42+
cgh.single_task(Kernel);
43+
});
44+
45+
auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{});
46+
47+
// PtrA should be filled with values
48+
Queue.ext_oneapi_graph(ExecGraph).wait();
49+
50+
Queue.copy(PtrA, HostDataA.data(), N).wait();
51+
Queue.copy(PtrB, HostDataB.data(), N).wait();
52+
for (size_t i = 0; i < N; i++) {
53+
assert(HostDataA[i] == i);
54+
assert(HostDataB[i] == 0);
55+
}
56+
57+
// Swap PtrB to be the input
58+
InputParam.update(PtrB);
59+
ExecGraph.update(KernelNode);
60+
Queue.ext_oneapi_graph(ExecGraph).wait();
61+
62+
Queue.copy(PtrA, HostDataA.data(), N).wait();
63+
Queue.copy(PtrB, HostDataB.data(), N).wait();
64+
for (size_t i = 0; i < N; i++) {
65+
assert(HostDataA[i] == i);
66+
assert(HostDataB[i] == i);
67+
}
68+
#endif
69+
return 0;
70+
}

0 commit comments

Comments
 (0)