Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1977,14 +1977,20 @@ Removing this restriction is something we may look at for future revisions of
The command submission functions defined in
link:../experimental/sycl_ext_oneapi_enqueue_functions.asciidoc[sycl_ext_oneapi_enqueue_functions]
can be used adding nodes to a graph when creating a graph from queue recording.
New methods are also defined that enable submitting an executable graph,
New methods are also defined that enable submitting an executable graph,
e.g. directly to a queue without returning an event.

==== sycl_ext_oneapi_free_function_kernels

`sycl_ext_oneapi_free_function_kernels`, defined in
`sycl_ext_oneapi_free_function_kernels`, defined in
link:../proposed/sycl_ext_oneapi_free_function_kernels.asciidoc[sycl_ext_oneapi_free_function_kernels]
can be used with SYCL Graphs.
can be used with SYCL Graphs.

==== sycl_ext_oneapi_work_group_memory

Using the `work_group_memory` object defined in
link:../experimental/sycl_ext_oneapi_work_group_memory.asciidoc[sycl_ext_oneapi_work_group_memory]
inside graph kernel nodes is supported.

== Examples and Usage Guide

Expand Down
10 changes: 10 additions & 0 deletions sycl/test-e2e/Graph/Explicit/work_group_memory.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
// Extra run to check for immediate-command-list in Level Zero
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}

#define GRAPH_E2E_EXPLICIT

#include "../Inputs/work_group_memory.cpp"
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
// Extra run to check for immediate-command-list in Level Zero
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}

// XFAIL: cuda
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004

#define GRAPH_E2E_EXPLICIT

#include "../Inputs/work_group_memory_free_function.cpp"
2 changes: 1 addition & 1 deletion sycl/test-e2e/Graph/Inputs/free_function_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ int main() {
kernel_bundle Bundle = get_kernel_bundle<bundle_state::executable>(Ctxt);
kernel_id Kernel_id = exp_ext::get_kernel_id<ff_0>();
kernel Kernel = Bundle.get_kernel(Kernel_id);
auto KernelNode = Graph.add([&](handler &cgh) {
auto KernelNode = add_node(Graph, Queue, [&](handler &cgh) {
cgh.set_arg(0, PtrA);
cgh.single_task(Kernel);
});
Expand Down
43 changes: 43 additions & 0 deletions sycl/test-e2e/Graph/Inputs/work_group_memory.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Tests using sycl_ext_oneapi_work_group_memory in a graph node

#include "../graph_common.hpp"
#include <sycl/ext/oneapi/experimental/work_group_memory.hpp>

int main() {
queue Queue;
exp_ext::command_graph Graph{Queue};

std::vector<int> HostData(Size);
std::iota(HostData.begin(), HostData.end(), 10);

int *Ptr = malloc_device<int>(Size, Queue);
Queue.copy(HostData.data(), Ptr, Size).wait();

const size_t LocalSize = 128;
auto node = add_node(Graph, Queue, [&](handler &CGH) {
exp_ext::work_group_memory<int[]> WGMem{LocalSize, CGH};

CGH.parallel_for(nd_range({Size}, {LocalSize}), [=](nd_item<1> Item) {
WGMem[Item.get_local_linear_id()] = Item.get_global_linear_id() * 2;
Ptr[Item.get_global_linear_id()] += WGMem[Item.get_local_linear_id()];
});
});

auto GraphExec = Graph.finalize();

for (unsigned N = 0; N < Iterations; N++) {
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
}
Queue.wait_and_throw();

Queue.copy(Ptr, HostData.data(), Size);
Queue.wait_and_throw();

for (size_t i = 0; i < Size; i++) {
int Ref = 10 + i + (Iterations * (i * 2));
assert(check_value(i, Ref, HostData[i], "Ptr"));
}

free(Ptr, Queue);
return 0;
}
64 changes: 64 additions & 0 deletions sycl/test-e2e/Graph/Inputs/work_group_memory_free_function.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Tests using sycl_ext_oneapi_work_group_memory in a graph node with
// free functions

#include "../graph_common.hpp"
#include <sycl/ext/oneapi/experimental/work_group_memory.hpp>
#include <sycl/ext/oneapi/free_function_queries.hpp>

SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(exp_ext::nd_range_kernel<1>)
void ff_local_mem(int *Ptr, exp_ext::work_group_memory<int[]> LocalMem) {
const auto WI = sycl::ext::oneapi::this_work_item::get_nd_item<1>();
size_t LocalID = WI.get_local_id();
size_t GlobalID = WI.get_global_id();

LocalMem[LocalID] = GlobalID * 2;
Ptr[GlobalID] += LocalMem[LocalID];
}

int main() {
queue Queue;
exp_ext::command_graph Graph{Queue};

std::vector<int> HostData(Size);
std::iota(HostData.begin(), HostData.end(), 10);

int *Ptr = malloc_device<int>(Size, Queue);
Queue.copy(HostData.data(), Ptr, Size).wait();

const size_t LocalSize = 128;

#ifndef __SYCL_DEVICE_ONLY__
kernel_bundle Bundle =
get_kernel_bundle<bundle_state::executable>(Queue.get_context());
kernel_id Kernel_id = exp_ext::get_kernel_id<ff_local_mem>();
kernel Kernel = Bundle.get_kernel(Kernel_id);

auto node = add_node(Graph, Queue, [&](handler &CGH) {
CGH.set_arg(0, Ptr);

exp_ext::work_group_memory<int[]> WGMem{LocalSize, CGH};
CGH.set_arg(1, WGMem);

nd_range NDRange{{Size}, {LocalSize}};
CGH.parallel_for(NDRange, Kernel);
});

auto GraphExec = Graph.finalize();

for (unsigned N = 0; N < Iterations; N++) {
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
}
Queue.wait_and_throw();

Queue.copy(Ptr, HostData.data(), Size);
Queue.wait_and_throw();

for (size_t i = 0; i < Size; i++) {
int Ref = 10 + i + (Iterations * (i * 2));
assert(check_value(i, Ref, HostData[i], "Ptr"));
}
#endif

free(Ptr, Queue);
return 0;
}
10 changes: 10 additions & 0 deletions sycl/test-e2e/Graph/RecordReplay/work_group_memory.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
// Extra run to check for immediate-command-list in Level Zero
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}

#define GRAPH_E2E_RECORD_REPLAY

#include "../Inputs/work_group_memory.cpp"
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
// Extra run to check for immediate-command-list in Level Zero
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}

// XFAIL: cuda
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004

#define GRAPH_E2E_RECORD_REPLAY

#include "../Inputs/work_group_memory_free_function.cpp"