diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index a5596b6e96158..f20ee0e450c85 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -1977,14 +1977,20 @@ Removing this restriction is something we may look at for future revisions of The command submission functions defined in link:../experimental/sycl_ext_oneapi_enqueue_functions.asciidoc[sycl_ext_oneapi_enqueue_functions] can be used adding nodes to a graph when creating a graph from queue recording. -New methods are also defined that enable submitting an executable graph, +New methods are also defined that enable submitting an executable graph, e.g. directly to a queue without returning an event. ==== sycl_ext_oneapi_free_function_kernels -`sycl_ext_oneapi_free_function_kernels`, defined in +`sycl_ext_oneapi_free_function_kernels`, defined in link:../proposed/sycl_ext_oneapi_free_function_kernels.asciidoc[sycl_ext_oneapi_free_function_kernels] -can be used with SYCL Graphs. +can be used with SYCL Graphs. + +==== sycl_ext_oneapi_work_group_memory + +Using the `work_group_memory` object defined in +link:../experimental/sycl_ext_oneapi_work_group_memory.asciidoc[sycl_ext_oneapi_work_group_memory] +inside graph kernel nodes is supported. == Examples and Usage Guide diff --git a/sycl/test-e2e/Graph/Explicit/work_group_memory.cpp b/sycl/test-e2e/Graph/Explicit/work_group_memory.cpp new file mode 100644 index 0000000000000..95ffa01d8d0e1 --- /dev/null +++ b/sycl/test-e2e/Graph/Explicit/work_group_memory.cpp @@ -0,0 +1,10 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +#define GRAPH_E2E_EXPLICIT + +#include "../Inputs/work_group_memory.cpp" diff --git a/sycl/test-e2e/Graph/Explicit/work_group_memory_free_function.cpp b/sycl/test-e2e/Graph/Explicit/work_group_memory_free_function.cpp new file mode 100644 index 0000000000000..74e1d83fbc5ad --- /dev/null +++ b/sycl/test-e2e/Graph/Explicit/work_group_memory_free_function.cpp @@ -0,0 +1,13 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: cuda +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004 + +#define GRAPH_E2E_EXPLICIT + +#include "../Inputs/work_group_memory_free_function.cpp" diff --git a/sycl/test-e2e/Graph/Inputs/free_function_kernels.cpp b/sycl/test-e2e/Graph/Inputs/free_function_kernels.cpp index 5a3e9e3304d01..063d5740a8715 100644 --- a/sycl/test-e2e/Graph/Inputs/free_function_kernels.cpp +++ b/sycl/test-e2e/Graph/Inputs/free_function_kernels.cpp @@ -25,7 +25,7 @@ int main() { kernel_bundle Bundle = get_kernel_bundle(Ctxt); kernel_id Kernel_id = exp_ext::get_kernel_id(); kernel Kernel = Bundle.get_kernel(Kernel_id); - auto KernelNode = Graph.add([&](handler &cgh) { + auto KernelNode = add_node(Graph, Queue, [&](handler &cgh) { cgh.set_arg(0, PtrA); cgh.single_task(Kernel); }); diff --git a/sycl/test-e2e/Graph/Inputs/work_group_memory.cpp b/sycl/test-e2e/Graph/Inputs/work_group_memory.cpp new file mode 100644 index 0000000000000..2a2a6f1634a7d --- /dev/null +++ b/sycl/test-e2e/Graph/Inputs/work_group_memory.cpp @@ -0,0 +1,43 @@ +// Tests using sycl_ext_oneapi_work_group_memory in a graph node + +#include "../graph_common.hpp" +#include + +int main() { + queue Queue; + exp_ext::command_graph Graph{Queue}; + + std::vector HostData(Size); + std::iota(HostData.begin(), HostData.end(), 10); + + int *Ptr = malloc_device(Size, Queue); + Queue.copy(HostData.data(), Ptr, Size).wait(); + + const size_t LocalSize = 128; + auto node = add_node(Graph, Queue, [&](handler &CGH) { + exp_ext::work_group_memory WGMem{LocalSize, CGH}; + + CGH.parallel_for(nd_range({Size}, {LocalSize}), [=](nd_item<1> Item) { + WGMem[Item.get_local_linear_id()] = Item.get_global_linear_id() * 2; + Ptr[Item.get_global_linear_id()] += WGMem[Item.get_local_linear_id()]; + }); + }); + + auto GraphExec = Graph.finalize(); + + for (unsigned N = 0; N < Iterations; N++) { + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); + } + Queue.wait_and_throw(); + + Queue.copy(Ptr, HostData.data(), Size); + Queue.wait_and_throw(); + + for (size_t i = 0; i < Size; i++) { + int Ref = 10 + i + (Iterations * (i * 2)); + assert(check_value(i, Ref, HostData[i], "Ptr")); + } + + free(Ptr, Queue); + return 0; +} diff --git a/sycl/test-e2e/Graph/Inputs/work_group_memory_free_function.cpp b/sycl/test-e2e/Graph/Inputs/work_group_memory_free_function.cpp new file mode 100644 index 0000000000000..46edb0299b975 --- /dev/null +++ b/sycl/test-e2e/Graph/Inputs/work_group_memory_free_function.cpp @@ -0,0 +1,64 @@ +// Tests using sycl_ext_oneapi_work_group_memory in a graph node with +// free functions + +#include "../graph_common.hpp" +#include +#include + +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(exp_ext::nd_range_kernel<1>) +void ff_local_mem(int *Ptr, exp_ext::work_group_memory LocalMem) { + const auto WI = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); + size_t LocalID = WI.get_local_id(); + size_t GlobalID = WI.get_global_id(); + + LocalMem[LocalID] = GlobalID * 2; + Ptr[GlobalID] += LocalMem[LocalID]; +} + +int main() { + queue Queue; + exp_ext::command_graph Graph{Queue}; + + std::vector HostData(Size); + std::iota(HostData.begin(), HostData.end(), 10); + + int *Ptr = malloc_device(Size, Queue); + Queue.copy(HostData.data(), Ptr, Size).wait(); + + const size_t LocalSize = 128; + +#ifndef __SYCL_DEVICE_ONLY__ + kernel_bundle Bundle = + get_kernel_bundle(Queue.get_context()); + kernel_id Kernel_id = exp_ext::get_kernel_id(); + kernel Kernel = Bundle.get_kernel(Kernel_id); + + auto node = add_node(Graph, Queue, [&](handler &CGH) { + CGH.set_arg(0, Ptr); + + exp_ext::work_group_memory WGMem{LocalSize, CGH}; + CGH.set_arg(1, WGMem); + + nd_range NDRange{{Size}, {LocalSize}}; + CGH.parallel_for(NDRange, Kernel); + }); + + auto GraphExec = Graph.finalize(); + + for (unsigned N = 0; N < Iterations; N++) { + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); + } + Queue.wait_and_throw(); + + Queue.copy(Ptr, HostData.data(), Size); + Queue.wait_and_throw(); + + for (size_t i = 0; i < Size; i++) { + int Ref = 10 + i + (Iterations * (i * 2)); + assert(check_value(i, Ref, HostData[i], "Ptr")); + } +#endif + + free(Ptr, Queue); + return 0; +} diff --git a/sycl/test-e2e/Graph/RecordReplay/work_group_memory.cpp b/sycl/test-e2e/Graph/RecordReplay/work_group_memory.cpp new file mode 100644 index 0000000000000..2ee08a65a77dc --- /dev/null +++ b/sycl/test-e2e/Graph/RecordReplay/work_group_memory.cpp @@ -0,0 +1,10 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +#define GRAPH_E2E_RECORD_REPLAY + +#include "../Inputs/work_group_memory.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/work_group_memory_free_function.cpp b/sycl/test-e2e/Graph/RecordReplay/work_group_memory_free_function.cpp new file mode 100644 index 0000000000000..9206143567091 --- /dev/null +++ b/sycl/test-e2e/Graph/RecordReplay/work_group_memory_free_function.cpp @@ -0,0 +1,13 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: cuda +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004 + +#define GRAPH_E2E_RECORD_REPLAY + +#include "../Inputs/work_group_memory_free_function.cpp"