Skip to content

Commit 6b12135

Browse files
author
Konrad Kusiak
committed
Added sycl::free, changed to XFAIL and made use of Size in graph_common.hpp
1 parent 7ddaf7d commit 6b12135

12 files changed

+143
-147
lines changed

sycl/test-e2e/Graph/Update/FreeFunctionKernels/free_function_kernels.hpp

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,18 @@
1+
#include "../../graph_common.hpp"
12
#include "sycl/ext/oneapi/kernel_properties/properties.hpp"
23
#include "sycl/kernel_bundle.hpp"
34
#include <sycl/ext/oneapi/free_function_queries.hpp>
45

5-
namespace exp_ext = sycl::ext::oneapi::experimental;
6-
using namespace sycl;
7-
86
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::single_task_kernel))
9-
void ff_0(int *ptr, size_t size) {
10-
for (size_t i{0}; i < size; ++i) {
7+
void ff_0(int *ptr) {
8+
for (size_t i{0}; i < Size; ++i) {
119
ptr[i] = i;
1210
}
1311
}
1412

1513
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::single_task_kernel))
16-
void ff_1(int *ptr, size_t size) {
17-
for (size_t i{0}; i < size; ++i) {
14+
void ff_1(int *ptr) {
15+
for (size_t i{0}; i < Size; ++i) {
1816
ptr[i] += i;
1917
}
2018
}
@@ -49,8 +47,8 @@ void ff_5(int *ptrA, int *ptrB, int *ptrC) {
4947
}
5048

5149
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::single_task_kernel))
52-
void ff_6(int *ptr, int scalarValue, size_t size) {
53-
for (size_t i{0}; i < size; ++i) {
50+
void ff_6(int *ptr, int scalarValue) {
51+
for (size_t i{0}; i < Size; ++i) {
5452
ptr[i] = scalarValue;
5553
}
5654
}

sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_before_finalize.cpp

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
// Extra run to check for immediate-command-list in Level Zero
66
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
77
//
8-
// The name mangling for free function kernels currently does not work with PTX.
9-
// UNSUPPORTED: cuda
8+
// XFAIL: cuda
9+
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
1010

1111
// Tests updating a graph node before finalization
1212

@@ -17,18 +17,16 @@ int main() {
1717
queue Queue{};
1818
context ctxt{Queue.get_context()};
1919

20-
const size_t N = 1024;
21-
2220
exp_ext::command_graph Graph{ctxt, Queue.get_device()};
2321

24-
int *PtrA = malloc_device<int>(N, Queue);
25-
int *PtrB = malloc_device<int>(N, Queue);
22+
int *PtrA = malloc_device<int>(Size, Queue);
23+
int *PtrB = malloc_device<int>(Size, Queue);
2624

27-
std::vector<int> HostDataA(N);
28-
std::vector<int> HostDataB(N);
25+
std::vector<int> HostDataA(Size);
26+
std::vector<int> HostDataB(Size);
2927

30-
Queue.memset(PtrA, 0, N * sizeof(int)).wait();
31-
Queue.memset(PtrB, 0, N * sizeof(int)).wait();
28+
Queue.memset(PtrA, 0, Size * sizeof(int)).wait();
29+
Queue.memset(PtrB, 0, Size * sizeof(int)).wait();
3230

3331
exp_ext::dynamic_parameter InputParam(Graph, PtrA);
3432

@@ -38,7 +36,6 @@ int main() {
3836
kernel Kernel = Bundle.get_kernel(Kernel_id);
3937
auto KernelNode = Graph.add([&](handler &cgh) {
4038
cgh.set_arg(0, InputParam);
41-
cgh.set_arg(1, N);
4239
cgh.single_task(Kernel);
4340
});
4441
// Swap PtrB to be the input
@@ -49,12 +46,14 @@ int main() {
4946
// Only PtrB should be filled with values
5047
Queue.ext_oneapi_graph(ExecGraph).wait();
5148

52-
Queue.copy(PtrA, HostDataA.data(), N).wait();
53-
Queue.copy(PtrB, HostDataB.data(), N).wait();
54-
for (size_t i = 0; i < N; i++) {
49+
Queue.copy(PtrA, HostDataA.data(), Size).wait();
50+
Queue.copy(PtrB, HostDataB.data(), Size).wait();
51+
for (size_t i = 0; i < Size; i++) {
5552
assert(HostDataA[i] == 0);
5653
assert(HostDataB[i] == i);
5754
}
55+
sycl::free(PtrA, Queue);
56+
sycl::free(PtrB, Queue);
5857
#endif
5958
return 0;
6059
}

sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_multiple_exec_graphs.cpp

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
// Extra run to check for immediate-command-list in Level Zero
66
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
77
//
8-
// The name mangling for free function kernels currently does not work with PTX.
9-
// UNSUPPORTED: cuda
8+
// XFAIL: cuda
9+
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
1010

1111
// Tests creating multiple executable graphs from the same modifiable graph and
1212
// only updating one of them.
@@ -18,18 +18,16 @@ int main() {
1818
queue Queue{};
1919
context ctxt{Queue.get_context()};
2020

21-
const size_t N = 1024;
22-
2321
exp_ext::command_graph Graph{ctxt, Queue.get_device()};
2422

25-
int *PtrA = malloc_device<int>(N, Queue);
26-
int *PtrB = malloc_device<int>(N, Queue);
23+
int *PtrA = malloc_device<int>(Size, Queue);
24+
int *PtrB = malloc_device<int>(Size, Queue);
2725

28-
std::vector<int> HostDataA(N);
29-
std::vector<int> HostDataB(N);
26+
std::vector<int> HostDataA(Size);
27+
std::vector<int> HostDataB(Size);
3028

31-
Queue.memset(PtrA, 0, N * sizeof(int)).wait();
32-
Queue.memset(PtrB, 0, N * sizeof(int)).wait();
29+
Queue.memset(PtrA, 0, Size * sizeof(int)).wait();
30+
Queue.memset(PtrB, 0, Size * sizeof(int)).wait();
3331

3432
exp_ext::dynamic_parameter InputParam(Graph, PtrA);
3533

@@ -39,7 +37,6 @@ int main() {
3937
kernel Kernel = Bundle.get_kernel(Kernel_id);
4038
auto KernelNode = Graph.add([&](handler &cgh) {
4139
cgh.set_arg(0, InputParam);
42-
cgh.set_arg(1, N);
4340
cgh.single_task(Kernel);
4441
});
4542

@@ -50,9 +47,9 @@ int main() {
5047
Queue.ext_oneapi_graph(ExecGraph).wait();
5148
Queue.ext_oneapi_graph(ExecGraph2).wait();
5249

53-
Queue.copy(PtrA, HostDataA.data(), N).wait();
54-
Queue.copy(PtrB, HostDataB.data(), N).wait();
55-
for (size_t i = 0; i < N; i++) {
50+
Queue.copy(PtrA, HostDataA.data(), Size).wait();
51+
Queue.copy(PtrB, HostDataB.data(), Size).wait();
52+
for (size_t i = 0; i < Size; i++) {
5653
assert(HostDataA[i] == i * 2);
5754
assert(HostDataB[i] == 0);
5855
}
@@ -65,13 +62,15 @@ int main() {
6562
Queue.ext_oneapi_graph(ExecGraph).wait();
6663
Queue.ext_oneapi_graph(ExecGraph2).wait();
6764

68-
Queue.copy(PtrA, HostDataA.data(), N).wait();
69-
Queue.copy(PtrB, HostDataB.data(), N).wait();
70-
for (size_t i = 0; i < N; i++) {
65+
Queue.copy(PtrA, HostDataA.data(), Size).wait();
66+
Queue.copy(PtrB, HostDataB.data(), Size).wait();
67+
for (size_t i = 0; i < Size; i++) {
7168
// A should have been modified 3 times by now, B only once
7269
assert(HostDataA[i] == i * 3);
7370
assert(HostDataB[i] == i);
7471
}
72+
sycl::free(PtrA, Queue);
73+
sycl::free(PtrB, Queue);
7574
#endif
7675
return 0;
7776
}

sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ordering.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
// Extra run to check for immediate-command-list in Level Zero
66
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
77
//
8-
// The name mangling for free function kernels currently does not work with PTX.
9-
// UNSUPPORTED: cuda
8+
// XFAIL: cuda
9+
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
1010

1111
// Tests that updating a graph is ordered with respect to previous executions of
1212
// the graph which may be in flight.
@@ -72,6 +72,8 @@ int main() {
7272
assert(HostDataA[i] == i * NumKernelLoops * NumSubmitLoops);
7373
assert(HostDataB[i] == i * NumKernelLoops * NumSubmitLoops);
7474
}
75+
sycl::free(PtrA, Queue);
76+
sycl::free(PtrB, Queue);
7577
#endif
7678
return 0;
7779
}

sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr.cpp

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
// Extra run to check for immediate-command-list in Level Zero
66
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
77
//
8-
// The name mangling for free function kernels currently does not work with PTX.
9-
// UNSUPPORTED: cuda
8+
// XFAIL: cuda
9+
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
1010

1111
// Tests updating a graph node using index-based explicit update
1212

@@ -17,18 +17,16 @@ int main() {
1717
queue Queue{};
1818
context ctxt{Queue.get_context()};
1919

20-
const size_t N = 1024;
21-
2220
exp_ext::command_graph Graph{ctxt, Queue.get_device()};
2321

24-
int *PtrA = malloc_device<int>(N, Queue);
25-
int *PtrB = malloc_device<int>(N, Queue);
22+
int *PtrA = malloc_device<int>(Size, Queue);
23+
int *PtrB = malloc_device<int>(Size, Queue);
2624

27-
std::vector<int> HostDataA(N);
28-
std::vector<int> HostDataB(N);
25+
std::vector<int> HostDataA(Size);
26+
std::vector<int> HostDataB(Size);
2927

30-
Queue.memset(PtrA, 0, N * sizeof(int)).wait();
31-
Queue.memset(PtrB, 0, N * sizeof(int)).wait();
28+
Queue.memset(PtrA, 0, Size * sizeof(int)).wait();
29+
Queue.memset(PtrB, 0, Size * sizeof(int)).wait();
3230

3331
exp_ext::dynamic_parameter InputParam(Graph, PtrA);
3432

@@ -38,7 +36,6 @@ int main() {
3836
kernel Kernel = Bundle.get_kernel(Kernel_id);
3937
auto KernelNode = Graph.add([&](handler &cgh) {
4038
cgh.set_arg(0, InputParam);
41-
cgh.set_arg(0, N);
4239
cgh.single_task(Kernel);
4340
});
4441

@@ -47,9 +44,9 @@ int main() {
4744
// PtrA should be filled with values
4845
Queue.ext_oneapi_graph(ExecGraph).wait();
4946

50-
Queue.copy(PtrA, HostDataA.data(), N).wait();
51-
Queue.copy(PtrB, HostDataB.data(), N).wait();
52-
for (size_t i = 0; i < N; i++) {
47+
Queue.copy(PtrA, HostDataA.data(), Size).wait();
48+
Queue.copy(PtrB, HostDataB.data(), Size).wait();
49+
for (size_t i = 0; i < Size; i++) {
5350
assert(HostDataA[i] == i);
5451
assert(HostDataB[i] == 0);
5552
}
@@ -59,12 +56,14 @@ int main() {
5956
ExecGraph.update(KernelNode);
6057
Queue.ext_oneapi_graph(ExecGraph).wait();
6158

62-
Queue.copy(PtrA, HostDataA.data(), N).wait();
63-
Queue.copy(PtrB, HostDataB.data(), N).wait();
64-
for (size_t i = 0; i < N; i++) {
59+
Queue.copy(PtrA, HostDataA.data(), Size).wait();
60+
Queue.copy(PtrB, HostDataB.data(), Size).wait();
61+
for (size_t i = 0; i < Size; i++) {
6562
assert(HostDataA[i] == i);
6663
assert(HostDataB[i] == i);
6764
}
65+
sycl::free(PtrA, Queue);
66+
sycl::free(PtrB, Queue);
6867
#endif
6968
return 0;
7069
}

sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_3D.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
// Extra run to check for immediate-command-list in Level Zero
66
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
77
//
8-
// The name mangling for free function kernels currently does not work with PTX.
9-
// UNSUPPORTED: cuda
8+
// XFAIL: cuda
9+
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
1010

1111
// Tests updating a 3D ND-Range graph kernel node using index-based explicit
1212
// update
@@ -79,6 +79,8 @@ int main() {
7979
assert(HostDataA[i] == Ref);
8080
assert(HostDataB[i] == Ref);
8181
}
82+
sycl::free(PtrA, Queue);
83+
sycl::free(PtrB, Queue);
8284
#endif
8385
return 0;
8486
}

sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_double_update.cpp

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
// Extra run to check for immediate-command-list in Level Zero
66
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
77
//
8-
// The name mangling for free function kernels currently does not work with PTX.
9-
// UNSUPPORTED: cuda
8+
// XFAIL: cuda
9+
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
1010

1111
// Tests updating a graph node using index-based explicit update
1212

@@ -17,21 +17,19 @@ int main() {
1717
queue Queue{};
1818
context ctxt{Queue.get_context()};
1919

20-
const size_t N = 1024;
21-
2220
exp_ext::command_graph Graph{ctxt, Queue.get_device()};
2321

24-
int *PtrA = malloc_device<int>(N, Queue);
25-
int *PtrB = malloc_device<int>(N, Queue);
26-
int *PtrUnused = malloc_device<int>(N, Queue);
22+
int *PtrA = malloc_device<int>(Size, Queue);
23+
int *PtrB = malloc_device<int>(Size, Queue);
24+
int *PtrUnused = malloc_device<int>(Size, Queue);
2725

28-
std::vector<int> HostDataA(N);
29-
std::vector<int> HostDataB(N);
30-
std::vector<int> HostDataUnused(N);
26+
std::vector<int> HostDataA(Size);
27+
std::vector<int> HostDataB(Size);
28+
std::vector<int> HostDataUnused(Size);
3129

32-
Queue.memset(PtrA, 0, N * sizeof(int)).wait();
33-
Queue.memset(PtrB, 0, N * sizeof(int)).wait();
34-
Queue.memset(PtrUnused, 0, N * sizeof(int)).wait();
30+
Queue.memset(PtrA, 0, Size * sizeof(int)).wait();
31+
Queue.memset(PtrB, 0, Size * sizeof(int)).wait();
32+
Queue.memset(PtrUnused, 0, Size * sizeof(int)).wait();
3533

3634
exp_ext::dynamic_parameter InputParam(Graph, PtrA);
3735

@@ -41,7 +39,6 @@ int main() {
4139
kernel Kernel = Bundle.get_kernel(Kernel_id);
4240
auto KernelNode = Graph.add([&](handler &cgh) {
4341
cgh.set_arg(0, InputParam);
44-
cgh.set_arg(1, N);
4542
cgh.single_task(Kernel);
4643
});
4744

@@ -50,10 +47,10 @@ int main() {
5047
// PtrA should be filled with values
5148
Queue.ext_oneapi_graph(ExecGraph).wait();
5249

53-
Queue.copy(PtrA, HostDataA.data(), N).wait();
54-
Queue.copy(PtrB, HostDataB.data(), N).wait();
55-
Queue.copy(PtrUnused, HostDataUnused.data(), N).wait();
56-
for (size_t i = 0; i < N; i++) {
50+
Queue.copy(PtrA, HostDataA.data(), Size).wait();
51+
Queue.copy(PtrB, HostDataB.data(), Size).wait();
52+
Queue.copy(PtrUnused, HostDataUnused.data(), Size).wait();
53+
for (size_t i = 0; i < Size; i++) {
5754
assert(HostDataA[i] == i);
5855
assert(HostDataB[i] == 0);
5956
assert(HostDataUnused[i] == 0);
@@ -66,15 +63,18 @@ int main() {
6663
ExecGraph.update(KernelNode);
6764
Queue.ext_oneapi_graph(ExecGraph).wait();
6865

69-
Queue.copy(PtrA, HostDataA.data(), N).wait();
70-
Queue.copy(PtrB, HostDataB.data(), N).wait();
71-
Queue.copy(PtrUnused, HostDataUnused.data(), N).wait();
72-
for (size_t i = 0; i < N; i++) {
66+
Queue.copy(PtrA, HostDataA.data(), Size).wait();
67+
Queue.copy(PtrB, HostDataB.data(), Size).wait();
68+
Queue.copy(PtrUnused, HostDataUnused.data(), Size).wait();
69+
for (size_t i = 0; i < Size; i++) {
7370
assert(HostDataA[i] == i);
7471
assert(HostDataB[i] == i);
7572
// Check that PtrUnused was never actually used in a kernel
7673
assert(HostDataUnused[i] == 0);
7774
}
75+
sycl::free(PtrA, Queue);
76+
sycl::free(PtrB, Queue);
77+
sycl::free(PtrUnused, Queue);
7878
#endif
7979
return 0;
8080
}

0 commit comments

Comments
 (0)