Skip to content

Commit f8b0dcb

Browse files
BensuoEwan Crawford
authored andcommitted
[SYCL][Graph]Fix and add E2E tests for using local accessors in graphs
- Update UR tag for fix to updating local accessors on CUDA/HIP - Add e2e tests covering local accessor usage
1 parent d130196 commit f8b0dcb

File tree

8 files changed

+189
-2
lines changed

8 files changed

+189
-2
lines changed

sycl/cmake/modules/FetchUnifiedRuntime.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ if(SYCL_UR_USE_FETCH_CONTENT)
116116
CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
117117
endfunction()
118118

119-
set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
119+
set(UNIFIED_RUNTIME_REPO "https://github.com/bensuo/unified-runtime.git")
120120
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/UnifiedRuntimeTag.cmake)
121121

122122
set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")

sycl/cmake/modules/UnifiedRuntimeTag.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@
44
# Date: Thu Oct 31 14:05:55 2024 +0000
55
# Merge pull request #2228 from nrspruit/copy_engine_refactor
66
# [L0] Refactor Copy Engine Usage checks for Performance
7-
set(UNIFIED_RUNTIME_TAG 3d58884b4939d9bd095c917f8dd823ac8486684c)
7+
set(UNIFIED_RUNTIME_TAG b7d78ba6de853103e4bb6c8dddfe43ad3e65b3a9)
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
8+
#define GRAPH_E2E_EXPLICIT
9+
10+
#include "../Inputs/local_accessor.cpp"
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
// Tests basic adding of nodes with local accessors,
2+
// and submission of the graph.
3+
4+
#include "../graph_common.hpp"
5+
6+
int main() {
7+
queue Queue{};
8+
9+
using T = int;
10+
11+
const size_t LocalSize = 128;
12+
13+
std::vector<T> DataA(Size), DataB(Size), DataC(Size);
14+
15+
std::iota(DataA.begin(), DataA.end(), 10);
16+
17+
std::vector<T> ReferenceA(DataA);
18+
19+
exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};
20+
21+
T *PtrA = malloc_device<T>(Size, Queue);
22+
23+
Queue.copy(DataA.data(), PtrA, Size);
24+
Queue.wait_and_throw();
25+
26+
auto node = add_node(Graph, Queue, [&](handler &CGH) {
27+
local_accessor<T, 1> localMem(LocalSize, CGH);
28+
29+
CGH.parallel_for(nd_range({Size}, {LocalSize}), [=](nd_item<1> Item) {
30+
localMem[Item.get_local_linear_id()] = Item.get_global_linear_id() * 2;
31+
PtrA[Item.get_global_linear_id()] += localMem[Item.get_local_linear_id()];
32+
});
33+
});
34+
35+
auto GraphExec = Graph.finalize();
36+
37+
for (unsigned n = 0; n < Iterations; n++) {
38+
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
39+
}
40+
41+
Queue.wait_and_throw();
42+
43+
Queue.copy(PtrA, DataA.data(), Size);
44+
Queue.wait_and_throw();
45+
46+
free(PtrA, Queue);
47+
48+
for (size_t i = 0; i < Size; i++) {
49+
T Ref = 10 + i + (i * 2);
50+
(check_value(i, Ref, ReferenceA[i], "PtrA"));
51+
}
52+
53+
return 0;
54+
}
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Tests whole graph update of nodes with local accessors,
2+
// and submission of the graph.
3+
4+
#include "../graph_common.hpp"
5+
6+
using T = int;
7+
8+
auto add_graph_node(
9+
exp_ext::command_graph<exp_ext::graph_state::modifiable> &Graph,
10+
queue &Queue, size_t Size, size_t LocalSize, T *Ptr) {
11+
return add_node(Graph, Queue, [&](handler &CGH) {
12+
local_accessor<T, 1> localMem(LocalSize, CGH);
13+
14+
CGH.parallel_for(nd_range({Size}, {LocalSize}), [=](nd_item<1> Item) {
15+
localMem[Item.get_local_linear_id()] = Item.get_global_linear_id() * 2;
16+
Ptr[Item.get_global_linear_id()] +=
17+
localMem[Item.get_local_linear_id()] + Item.get_local_range(0);
18+
});
19+
});
20+
}
21+
int main() {
22+
queue Queue{};
23+
24+
const size_t LocalSize = 128;
25+
26+
std::vector<T> DataA(Size), DataB(Size);
27+
28+
std::iota(DataA.begin(), DataA.end(), 10);
29+
std::iota(DataB.begin(), DataB.end(), 10);
30+
31+
std::vector<T> ReferenceA(DataA), ReferenceB(DataB);
32+
33+
exp_ext::command_graph GraphA{Queue.get_context(), Queue.get_device()};
34+
35+
T *PtrA = malloc_device<T>(Size, Queue);
36+
T *PtrB = malloc_device<T>(Size, Queue);
37+
38+
Queue.copy(DataA.data(), PtrA, Size);
39+
Queue.copy(DataB.data(), PtrB, Size);
40+
Queue.wait_and_throw();
41+
42+
auto NodeA = add_graph_node(GraphA, Queue, Size, LocalSize / 2, PtrA);
43+
44+
auto GraphExecA = GraphA.finalize(exp_ext::property::graph::updatable{});
45+
46+
// Create second graph for whole graph update with a different local size
47+
exp_ext::command_graph GraphB{Queue.get_context(), Queue.get_device()};
48+
auto NodeB = add_graph_node(GraphB, Queue, Size, LocalSize, PtrB);
49+
50+
// Execute graphs before updating and check outputs
51+
for (unsigned n = 0; n < Iterations; n++) {
52+
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExecA); });
53+
}
54+
55+
Queue.wait_and_throw();
56+
57+
Queue.copy(PtrA, DataA.data(), Size);
58+
Queue.copy(PtrB, DataB.data(), Size);
59+
Queue.wait_and_throw();
60+
61+
for (size_t i = 0; i < Size; i++) {
62+
T RefA = 10 + i + (i * 2) + LocalSize / 2;
63+
T RefB = 10 + i;
64+
(check_value(i, RefA, ReferenceA[i], "PtrA"));
65+
(check_value(i, RefB, ReferenceB[i], "PtrB"));
66+
}
67+
68+
// Update GraphExecA using whole graph update
69+
70+
GraphExecA.update(GraphB);
71+
72+
// Execute graphs again and check outputs
73+
for (unsigned n = 0; n < Iterations; n++) {
74+
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExecA); });
75+
}
76+
77+
Queue.wait_and_throw();
78+
79+
Queue.copy(PtrA, DataA.data(), Size);
80+
Queue.copy(PtrB, DataB.data(), Size);
81+
Queue.wait_and_throw();
82+
83+
for (size_t i = 0; i < Size; i++) {
84+
T RefA = 10 + i + (i * 2) + LocalSize / 2;
85+
T RefB = 10 + i + (i * 2) + LocalSize;
86+
(check_value(i, RefA, ReferenceA[i], "PtrA"));
87+
(check_value(i, RefB, ReferenceB[i], "PtrB"));
88+
}
89+
90+
free(PtrA, Queue);
91+
free(PtrB, Queue);
92+
return 0;
93+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
8+
#define GRAPH_E2E_RECORD_REPLAY
9+
10+
#include "../Inputs/local_accessor.cpp"
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
8+
#define GRAPH_E2E_EXPLICIT
9+
10+
#include "../../Inputs/whole_update_local_acc.cpp"
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
8+
#define GRAPH_E2E_RECORD_REPLAY
9+
10+
#include "../../Inputs/whole_update_local_acc.cpp"

0 commit comments

Comments
 (0)