diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index b81556a92ec2c..15985fb0cc0b0 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,7 +1,7 @@ -# commit f01741af022cfe82afcb026b9aa0be251eb6a497 -# Merge: 004d2474 85bb5f62 -# Author: Callum Fare -# Date: Tue Nov 5 13:39:53 2024 +0000 -# Merge pull request #2260 from nrspruit/refactor_l0_default_init -# [L0] Refactor to remove default constructor inits -set(UNIFIED_RUNTIME_TAG f01741af022cfe82afcb026b9aa0be251eb6a497) +# commit 3edf99755ce2af3b53102a7d8438e0fe969efac3 +# Merge: 5955bad3 0b968661 +# Author: Ross Brunton +# Date: Wed Nov 6 11:07:29 2024 +0000 +# Merge pull request #2082 from RossBrunton/ross/multiadapt +# [CI] Add "loader" support to conformance testing +set(UNIFIED_RUNTIME_TAG 3edf99755ce2af3b53102a7d8438e0fe969efac3) diff --git a/sycl/test-e2e/Graph/Explicit/local_accessor.cpp b/sycl/test-e2e/Graph/Explicit/local_accessor.cpp new file mode 100644 index 0000000000000..fbeb2c6a5ef5c --- /dev/null +++ b/sycl/test-e2e/Graph/Explicit/local_accessor.cpp @@ -0,0 +1,10 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +#define GRAPH_E2E_EXPLICIT + +#include "../Inputs/local_accessor.cpp" diff --git a/sycl/test-e2e/Graph/Inputs/local_accessor.cpp b/sycl/test-e2e/Graph/Inputs/local_accessor.cpp new file mode 100644 index 0000000000000..b3ac9fde67b6e --- /dev/null +++ b/sycl/test-e2e/Graph/Inputs/local_accessor.cpp @@ -0,0 +1,54 @@ +// Tests basic adding of nodes with local accessors, +// and submission of the graph. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + + using T = int; + + const size_t LocalSize = 128; + + std::vector DataA(Size), DataB(Size), DataC(Size); + + std::iota(DataA.begin(), DataA.end(), 10); + + std::vector ReferenceA(DataA); + + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + T *PtrA = malloc_device(Size, Queue); + + Queue.copy(DataA.data(), PtrA, Size); + Queue.wait_and_throw(); + + auto node = add_node(Graph, Queue, [&](handler &CGH) { + local_accessor LocalMem(LocalSize, CGH); + + CGH.parallel_for(nd_range({Size}, {LocalSize}), [=](nd_item<1> Item) { + LocalMem[Item.get_local_linear_id()] = Item.get_global_linear_id() * 2; + PtrA[Item.get_global_linear_id()] += LocalMem[Item.get_local_linear_id()]; + }); + }); + + auto GraphExec = Graph.finalize(); + + for (unsigned n = 0; n < Iterations; n++) { + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); + } + + Queue.wait_and_throw(); + + Queue.copy(PtrA, DataA.data(), Size); + Queue.wait_and_throw(); + + free(PtrA, Queue); + + for (size_t i = 0; i < Size; i++) { + T Ref = 10 + i + (i * 2); + check_value(i, Ref, ReferenceA[i], "PtrA"); + } + + return 0; +} diff --git a/sycl/test-e2e/Graph/Inputs/whole_update_local_acc.cpp b/sycl/test-e2e/Graph/Inputs/whole_update_local_acc.cpp new file mode 100644 index 0000000000000..100792a2e4762 --- /dev/null +++ b/sycl/test-e2e/Graph/Inputs/whole_update_local_acc.cpp @@ -0,0 +1,93 @@ +// Tests whole graph update of nodes with local accessors, +// and submission of the graph. + +#include "../graph_common.hpp" + +using T = int; + +auto add_graph_node( + exp_ext::command_graph &Graph, + queue &Queue, size_t Size, size_t LocalSize, T *Ptr) { + return add_node(Graph, Queue, [&](handler &CGH) { + local_accessor LocalMem(LocalSize, CGH); + + CGH.parallel_for(nd_range({Size}, {LocalSize}), [=](nd_item<1> Item) { + LocalMem[Item.get_local_linear_id()] = Item.get_global_linear_id() * 2; + Ptr[Item.get_global_linear_id()] += + LocalMem[Item.get_local_linear_id()] + Item.get_local_range(0); + }); + }); +} +int main() { + queue Queue{}; + + const size_t LocalSize = 128; + + std::vector DataA(Size), DataB(Size); + + std::iota(DataA.begin(), DataA.end(), 10); + std::iota(DataB.begin(), DataB.end(), 10); + + std::vector ReferenceA(DataA), ReferenceB(DataB); + + exp_ext::command_graph GraphA{Queue.get_context(), Queue.get_device()}; + + T *PtrA = malloc_device(Size, Queue); + T *PtrB = malloc_device(Size, Queue); + + Queue.copy(DataA.data(), PtrA, Size); + Queue.copy(DataB.data(), PtrB, Size); + Queue.wait_and_throw(); + + auto NodeA = add_graph_node(GraphA, Queue, Size, LocalSize / 2, PtrA); + + auto GraphExecA = GraphA.finalize(exp_ext::property::graph::updatable{}); + + // Create second graph for whole graph update with a different local size + exp_ext::command_graph GraphB{Queue.get_context(), Queue.get_device()}; + auto NodeB = add_graph_node(GraphB, Queue, Size, LocalSize, PtrB); + + // Execute graphs before updating and check outputs + for (unsigned n = 0; n < Iterations; n++) { + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExecA); }); + } + + Queue.wait_and_throw(); + + Queue.copy(PtrA, DataA.data(), Size); + Queue.copy(PtrB, DataB.data(), Size); + Queue.wait_and_throw(); + + for (size_t i = 0; i < Size; i++) { + T RefA = 10 + i + (i * 2) + LocalSize / 2; + T RefB = 10 + i; + check_value(i, RefA, ReferenceA[i], "PtrA"); + check_value(i, RefB, ReferenceB[i], "PtrB"); + } + + // Update GraphExecA using whole graph update + + GraphExecA.update(GraphB); + + // Execute graphs again and check outputs + for (unsigned n = 0; n < Iterations; n++) { + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExecA); }); + } + + Queue.wait_and_throw(); + + Queue.copy(PtrA, DataA.data(), Size); + Queue.copy(PtrB, DataB.data(), Size); + Queue.wait_and_throw(); + + for (size_t i = 0; i < Size; i++) { + T RefA = 10 + i + (i * 2) + LocalSize / 2; + T RefB = 10 + i + (i * 2) + LocalSize; + check_value(i, RefA, ReferenceA[i], "PtrA"); + check_value(i, RefB, ReferenceB[i], "PtrB"); + } + + free(PtrA, Queue); + free(PtrB, Queue); + return 0; +} diff --git a/sycl/test-e2e/Graph/RecordReplay/local_accessor.cpp b/sycl/test-e2e/Graph/RecordReplay/local_accessor.cpp new file mode 100644 index 0000000000000..245983f67da4a --- /dev/null +++ b/sycl/test-e2e/Graph/RecordReplay/local_accessor.cpp @@ -0,0 +1,10 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +#define GRAPH_E2E_RECORD_REPLAY + +#include "../Inputs/local_accessor.cpp" diff --git a/sycl/test-e2e/Graph/Update/Explicit/whole_update_local_acc.cpp b/sycl/test-e2e/Graph/Update/Explicit/whole_update_local_acc.cpp new file mode 100644 index 0000000000000..1db9905457ae7 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/Explicit/whole_update_local_acc.cpp @@ -0,0 +1,10 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +#define GRAPH_E2E_EXPLICIT + +#include "../../Inputs/whole_update_local_acc.cpp" diff --git a/sycl/test-e2e/Graph/Update/RecordReplay/whole_update_local_acc.cpp b/sycl/test-e2e/Graph/Update/RecordReplay/whole_update_local_acc.cpp new file mode 100644 index 0000000000000..03645b2f19bfd --- /dev/null +++ b/sycl/test-e2e/Graph/Update/RecordReplay/whole_update_local_acc.cpp @@ -0,0 +1,10 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +#define GRAPH_E2E_RECORD_REPLAY + +#include "../../Inputs/whole_update_local_acc.cpp"