Skip to content

Commit 0b9fc09

Browse files
author
Ewan Crawford
authored
[SYCL][Graph] Test WGU kernel mismatch (#14379)
We cannot currently update the kernel binary of a node in Whole Graph Update. Rather than silently accepting inconsistent kernel functions, which indicates the graphs aren't topologically identical, throw an error when the kernel types of two nodes are mismatched. This change requires removing the unittest for barrier nodes in Whole Graph Update as the mock infrastructure does not setup the internal `CG` class to the depth required to test working functionality. This functionality is already covered by `test-e2e/Graph/Update/whole_update_barrier_node.cpp`
1 parent af2221f commit 0b9fc09

File tree

4 files changed

+149
-46
lines changed

4 files changed

+149
-46
lines changed

sycl/source/detail/graph_impl.cpp

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1184,23 +1184,45 @@ void exec_graph_impl::update(std::shared_ptr<graph_impl> GraphImpl) {
11841184
throw sycl::exception(sycl::make_error_code(errc::invalid),
11851185
"Cannot update using a graph with a different "
11861186
"topology. Mismatch found in the number of nodes.");
1187-
} else {
1188-
for (uint32_t i = 0; i < MNodeStorage.size(); ++i) {
1189-
if (MNodeStorage[i]->MSuccessors.size() !=
1190-
GraphImpl->MNodeStorage[i]->MSuccessors.size() ||
1191-
MNodeStorage[i]->MPredecessors.size() !=
1192-
GraphImpl->MNodeStorage[i]->MPredecessors.size()) {
1193-
throw sycl::exception(
1194-
sycl::make_error_code(errc::invalid),
1195-
"Cannot update using a graph with a different topology. Mismatch "
1196-
"found in the number of edges.");
1197-
}
1187+
}
1188+
1189+
for (uint32_t i = 0; i < MNodeStorage.size(); ++i) {
1190+
if (MNodeStorage[i]->MSuccessors.size() !=
1191+
GraphImpl->MNodeStorage[i]->MSuccessors.size() ||
1192+
MNodeStorage[i]->MPredecessors.size() !=
1193+
GraphImpl->MNodeStorage[i]->MPredecessors.size()) {
1194+
throw sycl::exception(
1195+
sycl::make_error_code(errc::invalid),
1196+
"Cannot update using a graph with a different topology. Mismatch "
1197+
"found in the number of edges.");
1198+
}
1199+
if (MNodeStorage[i]->MCGType != GraphImpl->MNodeStorage[i]->MCGType) {
1200+
throw sycl::exception(
1201+
sycl::make_error_code(errc::invalid),
1202+
"Cannot update using a graph with mismatched node types. Each pair "
1203+
"of nodes being updated must have the same type");
1204+
}
11981205

1199-
if (MNodeStorage[i]->MCGType != GraphImpl->MNodeStorage[i]->MCGType) {
1200-
throw sycl::exception(
1201-
sycl::make_error_code(errc::invalid),
1202-
"Cannot update using a graph with mismatched node types. Each pair "
1203-
"of nodes being updated must have the same type");
1206+
if (MNodeStorage[i]->MCGType == sycl::detail::CG::Kernel) {
1207+
sycl::detail::CGExecKernel *TargetCGExec =
1208+
static_cast<sycl::detail::CGExecKernel *>(
1209+
MNodeStorage[i]->MCommandGroup.get());
1210+
const std::string &TargetKernelName = TargetCGExec->getKernelName();
1211+
1212+
sycl::detail::CGExecKernel *SourceCGExec =
1213+
static_cast<sycl::detail::CGExecKernel *>(
1214+
GraphImpl->MNodeStorage[i]->MCommandGroup.get());
1215+
const std::string &SourceKernelName = SourceCGExec->getKernelName();
1216+
1217+
if (TargetKernelName.compare(SourceKernelName) != 0) {
1218+
std::stringstream ErrorStream(
1219+
"Cannot update using a graph with mismatched kernel "
1220+
"types. Source node type ");
1221+
ErrorStream << SourceKernelName;
1222+
ErrorStream << ", target node type ";
1223+
ErrorStream << TargetKernelName;
1224+
throw sycl::exception(sycl::make_error_code(errc::invalid),
1225+
ErrorStream.str());
12041226
}
12051227
}
12061228
}

sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,15 @@ int main() {
3636
exp_ext::command_graph GraphA{Queue.get_context(), Queue.get_device()};
3737

3838
exp_ext::dynamic_parameter InputParam(GraphA, InputDataDevice1);
39+
auto KernelLambda = [=]() {
40+
for (size_t i = 0; i < Size; i++) {
41+
OutputDataDevice1[i] = InputDataDevice1[i];
42+
}
43+
};
44+
3945
GraphA.add([&](handler &CGH) {
4046
CGH.set_arg(1, InputParam);
41-
CGH.single_task([=]() {
42-
for (size_t i = 0; i < Size; i++) {
43-
OutputDataDevice1[i] = InputDataDevice1[i];
44-
}
45-
});
47+
CGH.single_task(KernelLambda);
4648
});
4749

4850
auto GraphExecA = GraphA.finalize();
@@ -59,13 +61,7 @@ int main() {
5961
InputParam.update(InputDataDevice2);
6062
exp_ext::command_graph GraphB{Queue.get_context(), Queue.get_device()};
6163

62-
GraphB.add([&](handler &CGH) {
63-
CGH.single_task([=]() {
64-
for (size_t i = 0; i < Size; i++) {
65-
OutputDataDevice1[i] = InputDataDevice1[i];
66-
}
67-
});
68-
});
64+
GraphB.add([&](handler &CGH) { CGH.single_task(KernelLambda); });
6965

7066
auto GraphExecB = GraphB.finalize(exp_ext::property::graph::updatable{});
7167
GraphExecB.update(GraphA);
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
8+
// Test that an error is thrown when the types of kernels do not match in Whole
9+
// Graph Update
10+
11+
#include "../graph_common.hpp"
12+
13+
void testFunctors(queue Queue, int *Data) {
14+
exp_ext::command_graph Graph{Queue};
15+
exp_ext::command_graph UpdateGraph{Queue};
16+
struct KernelFunctorA {
17+
KernelFunctorA(int *Data) : Data(Data) {}
18+
19+
void operator()() const { Data[0] = 42; }
20+
21+
int *Data;
22+
};
23+
24+
struct KernelFunctorB {
25+
KernelFunctorB(int *Data) : Data(Data) {}
26+
void operator()() const { Data[0] = 42; }
27+
28+
int *Data;
29+
};
30+
31+
Graph.add([&](handler &CGH) { CGH.single_task(KernelFunctorA{Data}); });
32+
33+
UpdateGraph.add([&](handler &CGH) { CGH.single_task(KernelFunctorB{Data}); });
34+
35+
auto GraphExec = Graph.finalize(exp_ext::property::graph::updatable{});
36+
37+
// Check it's an error if kernel types don't match
38+
std::error_code ErrorCode = make_error_code(sycl::errc::success);
39+
try {
40+
GraphExec.update(UpdateGraph);
41+
} catch (const sycl::exception &e) {
42+
ErrorCode = e.code();
43+
}
44+
assert(ErrorCode == sycl::errc::invalid);
45+
}
46+
47+
void testUnNamedLambdas(queue Queue, int *Data) {
48+
exp_ext::command_graph Graph{Queue};
49+
exp_ext::command_graph UpdateGraph{Queue};
50+
51+
Graph.add([&](handler &CGH) { CGH.single_task([=]() { Data[0] = 42; }); });
52+
53+
UpdateGraph.add(
54+
[&](handler &CGH) { CGH.single_task([=]() { Data[0] = 42; }); });
55+
56+
auto GraphExec = Graph.finalize(exp_ext::property::graph::updatable{});
57+
58+
// Check it's an error if kernel types don't match
59+
std::error_code ErrorCode = make_error_code(sycl::errc::success);
60+
try {
61+
GraphExec.update(UpdateGraph);
62+
} catch (const sycl::exception &e) {
63+
ErrorCode = e.code();
64+
}
65+
assert(ErrorCode == sycl::errc::invalid);
66+
}
67+
void testNamedLambdas(queue Queue, int *Data) {
68+
exp_ext::command_graph Graph{Queue};
69+
exp_ext::command_graph UpdateGraph{Queue};
70+
71+
auto LambdaA = [=]() { Data[0] = 42; };
72+
73+
Graph.add([&](handler &CGH) { CGH.single_task<class TestLambdaA>(LambdaA); });
74+
75+
auto LambdaB = [=]() { Data[0] = 42; };
76+
77+
UpdateGraph.add(
78+
[&](handler &CGH) { CGH.single_task<class TestLambdaB>(LambdaB); });
79+
80+
auto GraphExec = Graph.finalize(exp_ext::property::graph::updatable{});
81+
82+
// Check it's an error if kernel types don't match
83+
std::error_code ErrorCode = make_error_code(sycl::errc::success);
84+
try {
85+
GraphExec.update(UpdateGraph);
86+
} catch (const sycl::exception &e) {
87+
ErrorCode = e.code();
88+
}
89+
assert(ErrorCode == sycl::errc::invalid);
90+
}
91+
92+
int main() {
93+
queue Queue{};
94+
int *Data = malloc_device<int>(1, Queue);
95+
96+
testNamedLambdas(Queue, Data);
97+
testUnNamedLambdas(Queue, Data);
98+
testFunctors(Queue, Data);
99+
100+
sycl::free(Data, Queue);
101+
102+
return 0;
103+
}

sycl/unittests/Extensions/CommandGraph/Update.cpp

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -399,21 +399,3 @@ TEST_F(WholeGraphUpdateTest, EmptyNode) {
399399
auto GraphExec = Graph.finalize(experimental::property::graph::updatable{});
400400
GraphExec.update(UpdateGraph);
401401
}
402-
403-
TEST_F(WholeGraphUpdateTest, BarrierNode) {
404-
// Test that updating a graph that has a barrier node is not an error
405-
Graph.begin_recording(Queue);
406-
auto NodeKernel = Queue.submit(
407-
[&](sycl::handler &cgh) { cgh.single_task<TestKernel<>>([]() {}); });
408-
Queue.ext_oneapi_submit_barrier({NodeKernel});
409-
Graph.end_recording(Queue);
410-
411-
UpdateGraph.begin_recording(Queue);
412-
auto UpdateNodeKernel = Queue.submit(
413-
[&](sycl::handler &cgh) { cgh.single_task<TestKernel<>>([]() {}); });
414-
Queue.ext_oneapi_submit_barrier({UpdateNodeKernel});
415-
UpdateGraph.end_recording(Queue);
416-
417-
auto GraphExec = Graph.finalize(experimental::property::graph::updatable{});
418-
GraphExec.update(UpdateGraph);
419-
}

0 commit comments

Comments
 (0)