diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc
index 5dff0396f07fb..a5596b6e96158 100644
--- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc
@@ -58,6 +58,7 @@ Jack Kirk, Codeplay +
 Ronan Keryell, AMD +
 Andrey Alekseenko, KTH Royal Institute of Technology +
 Fábio Mestre, Codeplay +
+Konrad Kusiak, Codeplay +
 
 == Dependencies
 
@@ -1979,10 +1980,16 @@ can be used adding nodes to a graph when creating a graph from queue recording.
 New methods are also defined that enable submitting an executable graph, 
 e.g. directly to a queue without returning an event.
 
+==== sycl_ext_oneapi_free_function_kernels
+
+`sycl_ext_oneapi_free_function_kernels`, defined in 
+link:../proposed/sycl_ext_oneapi_free_function_kernels.asciidoc[sycl_ext_oneapi_free_function_kernels]
+can be used with SYCL Graphs. 
+
 == Examples and Usage Guide
 
 Detailed code examples and usage guidelines are provided in the
-link:../../SYCLGraphUsageGuide.md[SYCL Graph Usage Guide].
+link:../../syclgraph/SYCLGraphUsageGuide.md[SYCL Graph Usage Guide].
 
 == Future Direction [[future-direction]]
 
diff --git a/sycl/test-e2e/Graph/Explicit/free_function_kernels.cpp b/sycl/test-e2e/Graph/Explicit/free_function_kernels.cpp
new file mode 100644
index 0000000000000..3784ebbd800a6
--- /dev/null
+++ b/sycl/test-e2e/Graph/Explicit/free_function_kernels.cpp
@@ -0,0 +1,13 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+//
+// XFAIL: cuda
+// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
+
+#define GRAPH_E2E_EXPLICIT
+
+#include "../Inputs/free_function_kernels.cpp"
diff --git a/sycl/test-e2e/Graph/Inputs/free_function_kernels.cpp b/sycl/test-e2e/Graph/Inputs/free_function_kernels.cpp
new file mode 100644
index 0000000000000..5a3e9e3304d01
--- /dev/null
+++ b/sycl/test-e2e/Graph/Inputs/free_function_kernels.cpp
@@ -0,0 +1,45 @@
+// Tests compatibility with free function kernels extension
+
+#include "../graph_common.hpp"
+
+SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::single_task_kernel))
+void ff_0(int *Ptr) {
+  for (size_t i{0}; i < Size; ++i) {
+    Ptr[i] = i;
+  }
+}
+
+int main() {
+  queue Queue{};
+  context Ctxt{Queue.get_context()};
+
+  exp_ext::command_graph Graph{Ctxt, Queue.get_device()};
+
+  int *PtrA = malloc_device<int>(Size, Queue);
+
+  std::vector<int> HostDataA(Size);
+
+  Queue.memset(PtrA, 0, Size * sizeof(int)).wait();
+
+#ifndef __SYCL_DEVICE_ONLY__
+  kernel_bundle Bundle = get_kernel_bundle<bundle_state::executable>(Ctxt);
+  kernel_id Kernel_id = exp_ext::get_kernel_id<ff_0>();
+  kernel Kernel = Bundle.get_kernel(Kernel_id);
+  auto KernelNode = Graph.add([&](handler &cgh) {
+    cgh.set_arg(0, PtrA);
+    cgh.single_task(Kernel);
+  });
+
+  auto ExecGraph = Graph.finalize();
+
+  Queue.ext_oneapi_graph(ExecGraph).wait();
+
+  Queue.copy(PtrA, HostDataA.data(), Size).wait();
+  for (size_t i = 0; i < Size; i++) {
+    assert(HostDataA[i] == i);
+  }
+#endif
+  sycl::free(PtrA, Queue);
+
+  return 0;
+}
diff --git a/sycl/test-e2e/Graph/RecordReplay/free_function_kernels.cpp b/sycl/test-e2e/Graph/RecordReplay/free_function_kernels.cpp
new file mode 100644
index 0000000000000..ad003e575e044
--- /dev/null
+++ b/sycl/test-e2e/Graph/RecordReplay/free_function_kernels.cpp
@@ -0,0 +1,13 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+//
+// XFAIL: cuda
+// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
+
+#define GRAPH_E2E_RECORD_REPLAY
+
+#include "../Inputs/free_function_kernels.cpp"
diff --git a/sycl/test-e2e/Graph/Update/FreeFunctionKernels/free_function_kernels.hpp b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/free_function_kernels.hpp
new file mode 100644
index 0000000000000..d319d819abdad
--- /dev/null
+++ b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/free_function_kernels.hpp
@@ -0,0 +1,57 @@
+#pragma once
+
+#include "../../graph_common.hpp"
+#include "sycl/ext/oneapi/kernel_properties/properties.hpp"
+#include "sycl/kernel_bundle.hpp"
+#include <sycl/ext/oneapi/free_function_queries.hpp>
+
+SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::single_task_kernel))
+void ff_0(int *Ptr) {
+  for (size_t i{0}; i < Size; ++i) {
+    Ptr[i] = i;
+  }
+}
+
+SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::single_task_kernel))
+void ff_1(int *Ptr) {
+  for (size_t i{0}; i < Size; ++i) {
+    Ptr[i] += i;
+  }
+}
+
+SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::single_task_kernel))
+void ff_2(int *Ptr, size_t Size, size_t NumKernelLoops) {
+  for (size_t j{0}; j < NumKernelLoops; j++) {
+    for (size_t i{0}; i < Size; i++) {
+      Ptr[i] += i;
+    }
+  }
+}
+
+SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::nd_range_kernel<3>))
+void ff_3(int *Ptr) {
+  size_t GlobalID =
+      ext::oneapi::this_work_item::get_nd_item<3>().get_global_linear_id();
+  Ptr[GlobalID] = GlobalID;
+}
+
+SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::nd_range_kernel<3>))
+void ff_4(int *Ptr) {
+  size_t GlobalID =
+      ext::oneapi::this_work_item::get_nd_item<3>().get_global_linear_id();
+  Ptr[GlobalID] *= 2;
+}
+
+SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::nd_range_kernel<1>))
+void ff_5(int *PtrA, int *PtrB, int *PtrC) {
+  size_t GlobalID =
+      ext::oneapi::this_work_item::get_nd_item<1>().get_global_id();
+  PtrC[GlobalID] += PtrA[GlobalID] * PtrB[GlobalID];
+}
+
+SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::single_task_kernel))
+void ff_6(int *Ptr, int ScalarValue) {
+  for (size_t i{0}; i < Size; ++i) {
+    Ptr[i] = ScalarValue;
+  }
+}
diff --git a/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_before_finalize.cpp b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_before_finalize.cpp
new file mode 100644
index 0000000000000..5d0ac58cf0de9
--- /dev/null
+++ b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_before_finalize.cpp
@@ -0,0 +1,60 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+//
+// XFAIL: cuda
+// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
+
+// Tests updating a graph node before finalization
+
+#include "../../graph_common.hpp"
+#include "free_function_kernels.hpp"
+
+int main() {
+  queue Queue{};
+  context Ctxt{Queue.get_context()};
+
+  exp_ext::command_graph Graph{Ctxt, Queue.get_device()};
+
+  int *PtrA = malloc_device<int>(Size, Queue);
+  int *PtrB = malloc_device<int>(Size, Queue);
+
+  std::vector<int> HostDataA(Size);
+  std::vector<int> HostDataB(Size);
+
+  Queue.memset(PtrA, 0, Size * sizeof(int)).wait();
+  Queue.memset(PtrB, 0, Size * sizeof(int)).wait();
+
+  exp_ext::dynamic_parameter InputParam(Graph, PtrA);
+
+#ifndef __SYCL_DEVICE_ONLY__
+  kernel_bundle Bundle = get_kernel_bundle<bundle_state::executable>(Ctxt);
+  kernel_id Kernel_id = exp_ext::get_kernel_id<ff_0>();
+  kernel Kernel = Bundle.get_kernel(Kernel_id);
+  auto KernelNode = Graph.add([&](handler &cgh) {
+    cgh.set_arg(0, InputParam);
+    cgh.single_task(Kernel);
+  });
+  // Swap PtrB to be the input
+  InputParam.update(PtrB);
+
+  auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{});
+
+  // Only PtrB should be filled with values
+  Queue.ext_oneapi_graph(ExecGraph).wait();
+
+  Queue.copy(PtrA, HostDataA.data(), Size).wait();
+  Queue.copy(PtrB, HostDataB.data(), Size).wait();
+  for (size_t i = 0; i < Size; i++) {
+    assert(HostDataA[i] == 0);
+    assert(HostDataB[i] == i);
+  }
+#endif
+  sycl::free(PtrA, Queue);
+  sycl::free(PtrB, Queue);
+
+  return 0;
+}
diff --git a/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_multiple_exec_graphs.cpp b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_multiple_exec_graphs.cpp
new file mode 100644
index 0000000000000..269a5b5373638
--- /dev/null
+++ b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_multiple_exec_graphs.cpp
@@ -0,0 +1,77 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+//
+// XFAIL: cuda
+// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
+
+// Tests creating multiple executable graphs from the same modifiable graph and
+// only updating one of them.
+
+#include "../../graph_common.hpp"
+#include "free_function_kernels.hpp"
+
+int main() {
+  queue Queue{};
+  context Ctxt{Queue.get_context()};
+
+  exp_ext::command_graph Graph{Ctxt, Queue.get_device()};
+
+  int *PtrA = malloc_device<int>(Size, Queue);
+  int *PtrB = malloc_device<int>(Size, Queue);
+
+  std::vector<int> HostDataA(Size);
+  std::vector<int> HostDataB(Size);
+
+  Queue.memset(PtrA, 0, Size * sizeof(int)).wait();
+  Queue.memset(PtrB, 0, Size * sizeof(int)).wait();
+
+  exp_ext::dynamic_parameter InputParam(Graph, PtrA);
+
+#ifndef __SYCL_DEVICE_ONLY__
+  kernel_bundle Bundle = get_kernel_bundle<bundle_state::executable>(Ctxt);
+  kernel_id Kernel_id = exp_ext::get_kernel_id<ff_1>();
+  kernel Kernel = Bundle.get_kernel(Kernel_id);
+  auto KernelNode = Graph.add([&](handler &cgh) {
+    cgh.set_arg(0, InputParam);
+    cgh.single_task(Kernel);
+  });
+
+  auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{});
+  auto ExecGraph2 = Graph.finalize(exp_ext::property::graph::updatable{});
+
+  // PtrA values should be modified twice
+  Queue.ext_oneapi_graph(ExecGraph).wait();
+  Queue.ext_oneapi_graph(ExecGraph2).wait();
+
+  Queue.copy(PtrA, HostDataA.data(), Size).wait();
+  Queue.copy(PtrB, HostDataB.data(), Size).wait();
+  for (size_t i = 0; i < Size; i++) {
+    assert(HostDataA[i] == i * 2);
+    assert(HostDataB[i] == 0);
+  }
+
+  // Swap PtrB to be the input
+  InputParam.update(PtrB);
+  // Only update ExecGraph, which should now modify PtrB while ExecGraph2
+  // modifies PtrA still
+  ExecGraph.update(KernelNode);
+  Queue.ext_oneapi_graph(ExecGraph).wait();
+  Queue.ext_oneapi_graph(ExecGraph2).wait();
+
+  Queue.copy(PtrA, HostDataA.data(), Size).wait();
+  Queue.copy(PtrB, HostDataB.data(), Size).wait();
+  for (size_t i = 0; i < Size; i++) {
+    // A should have been modified 3 times by now, B only once
+    assert(HostDataA[i] == i * 3);
+    assert(HostDataB[i] == i);
+  }
+#endif
+  sycl::free(PtrA, Queue);
+  sycl::free(PtrB, Queue);
+
+  return 0;
+}
diff --git a/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ordering.cpp b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ordering.cpp
new file mode 100644
index 0000000000000..4d6aa6445cd0e
--- /dev/null
+++ b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ordering.cpp
@@ -0,0 +1,80 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+//
+// XFAIL: cuda
+// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
+
+// Tests that updating a graph is ordered with respect to previous executions of
+// the graph which may be in flight.
+
+#include "../../graph_common.hpp"
+#include "free_function_kernels.hpp"
+
+int main() {
+  queue Queue{};
+  context Ctxt{Queue.get_context()};
+
+  // Use a large N to try and make the kernel slow
+  const size_t N = 1 << 16;
+  // Loop inside kernel to make even slower (too large N runs out of memory)
+  const size_t NumKernelLoops = 4;
+  const size_t NumSubmitLoops = 8;
+
+  exp_ext::command_graph Graph{Ctxt, Queue.get_device()};
+
+  int *PtrA = malloc_device<int>(N, Queue);
+  int *PtrB = malloc_device<int>(N, Queue);
+
+  std::vector<int> HostDataA(N);
+  std::vector<int> HostDataB(N);
+
+  Queue.memset(PtrA, 0, N * sizeof(int)).wait();
+  Queue.memset(PtrB, 0, N * sizeof(int)).wait();
+
+  exp_ext::dynamic_parameter InputParam(Graph, PtrA);
+
+#ifndef __SYCL_DEVICE_ONLY__
+  kernel_bundle Bundle = get_kernel_bundle<bundle_state::executable>(Ctxt);
+  kernel_id Kernel_id = exp_ext::get_kernel_id<ff_2>();
+  kernel Kernel = Bundle.get_kernel(Kernel_id);
+  auto KernelNode = Graph.add([&](handler &cgh) {
+    cgh.set_arg(0, InputParam);
+    cgh.set_arg(1, N);
+    cgh.set_arg(2, NumKernelLoops);
+    cgh.single_task(Kernel);
+  });
+
+  auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{});
+
+  // Submit a bunch of graphs without waiting
+  for (size_t i = 0; i < NumSubmitLoops; i++) {
+    Queue.ext_oneapi_graph(ExecGraph);
+  }
+
+  // Swap PtrB to be the input
+  InputParam.update(PtrB);
+
+  ExecGraph.update(KernelNode);
+
+  // Submit another set of graphs then wait on all submissions
+  for (size_t i = 0; i < NumSubmitLoops; i++) {
+    Queue.ext_oneapi_graph(ExecGraph);
+  }
+  Queue.wait_and_throw();
+
+  Queue.copy(PtrA, HostDataA.data(), N).wait();
+  Queue.copy(PtrB, HostDataB.data(), N).wait();
+  for (size_t i = 0; i < N; i++) {
+    assert(HostDataA[i] == i * NumKernelLoops * NumSubmitLoops);
+    assert(HostDataB[i] == i * NumKernelLoops * NumSubmitLoops);
+  }
+#endif
+  sycl::free(PtrA, Queue);
+  sycl::free(PtrB, Queue);
+
+  return 0;
+}
diff --git a/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr.cpp b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr.cpp
new file mode 100644
index 0000000000000..64fc1408cee4a
--- /dev/null
+++ b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr.cpp
@@ -0,0 +1,70 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+//
+// XFAIL: cuda
+// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
+
+// Tests updating a graph node using index-based explicit update
+
+#include "../../graph_common.hpp"
+#include "free_function_kernels.hpp"
+
+int main() {
+  queue Queue{};
+  context Ctxt{Queue.get_context()};
+
+  exp_ext::command_graph Graph{Ctxt, Queue.get_device()};
+
+  int *PtrA = malloc_device<int>(Size, Queue);
+  int *PtrB = malloc_device<int>(Size, Queue);
+
+  std::vector<int> HostDataA(Size);
+  std::vector<int> HostDataB(Size);
+
+  Queue.memset(PtrA, 0, Size * sizeof(int)).wait();
+  Queue.memset(PtrB, 0, Size * sizeof(int)).wait();
+
+  exp_ext::dynamic_parameter InputParam(Graph, PtrA);
+
+#ifndef __SYCL_DEVICE_ONLY__
+  kernel_bundle Bundle = get_kernel_bundle<bundle_state::executable>(Ctxt);
+  kernel_id Kernel_id = exp_ext::get_kernel_id<ff_0>();
+  kernel Kernel = Bundle.get_kernel(Kernel_id);
+  auto KernelNode = Graph.add([&](handler &cgh) {
+    cgh.set_arg(0, InputParam);
+    cgh.single_task(Kernel);
+  });
+
+  auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{});
+
+  // PtrA should be filled with values
+  Queue.ext_oneapi_graph(ExecGraph).wait();
+
+  Queue.copy(PtrA, HostDataA.data(), Size).wait();
+  Queue.copy(PtrB, HostDataB.data(), Size).wait();
+  for (size_t i = 0; i < Size; i++) {
+    assert(HostDataA[i] == i);
+    assert(HostDataB[i] == 0);
+  }
+
+  // Swap PtrB to be the input
+  InputParam.update(PtrB);
+  ExecGraph.update(KernelNode);
+  Queue.ext_oneapi_graph(ExecGraph).wait();
+
+  Queue.copy(PtrA, HostDataA.data(), Size).wait();
+  Queue.copy(PtrB, HostDataB.data(), Size).wait();
+  for (size_t i = 0; i < Size; i++) {
+    assert(HostDataA[i] == i);
+    assert(HostDataB[i] == i);
+  }
+#endif
+  sycl::free(PtrA, Queue);
+  sycl::free(PtrB, Queue);
+
+  return 0;
+}
diff --git a/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_3D.cpp b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_3D.cpp
new file mode 100644
index 0000000000000..644478f9ddfb4
--- /dev/null
+++ b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_3D.cpp
@@ -0,0 +1,87 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+//
+// XFAIL: cuda
+// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
+
+// Tests updating a 3D ND-Range graph kernel node using index-based explicit
+// update
+
+#include "../../graph_common.hpp"
+#include "free_function_kernels.hpp"
+
+int main() {
+  queue Queue{};
+  context Ctxt{Queue.get_context()};
+
+  const range<3> GlobalWorkSize(1, 2, 2);
+  const range<3> LocalWorkSize(1, 2, 2);
+  const size_t N = GlobalWorkSize[0] * GlobalWorkSize[1] * GlobalWorkSize[2];
+
+  exp_ext::command_graph Graph{Ctxt, Queue.get_device()};
+
+  int *PtrA = malloc_device<int>(N, Queue);
+  int *PtrB = malloc_device<int>(N, Queue);
+
+  std::vector<int> HostDataA(N);
+  std::vector<int> HostDataB(N);
+
+  Queue.memset(PtrA, 0, N * sizeof(int)).wait();
+  Queue.memset(PtrB, 0, N * sizeof(int)).wait();
+
+  exp_ext::dynamic_parameter DynParam(Graph, PtrA);
+
+  nd_range<3> NDRange{GlobalWorkSize, LocalWorkSize};
+
+#ifndef __SYCL_DEVICE_ONLY__
+  kernel_bundle Bundle = get_kernel_bundle<bundle_state::executable>(Ctxt);
+  kernel_id Kernel_id_A = exp_ext::get_kernel_id<ff_3>();
+  kernel Kernel_A = Bundle.get_kernel(Kernel_id_A);
+  auto NodeA = Graph.add([&](handler &cgh) {
+    cgh.set_arg(0, DynParam);
+    cgh.parallel_for(NDRange, Kernel_A);
+  });
+
+  kernel_id Kernel_id_B = exp_ext::get_kernel_id<ff_4>();
+  kernel Kernel_B = Bundle.get_kernel(Kernel_id_B);
+  auto NodeB = Graph.add(
+      [&](handler &cgh) {
+        cgh.set_arg(0, DynParam);
+        cgh.parallel_for(NDRange, Kernel_B);
+      },
+      exp_ext::property::node::depends_on{NodeA});
+
+  auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{});
+
+  // PtrA should be filled with values
+  Queue.ext_oneapi_graph(ExecGraph).wait();
+
+  Queue.copy(PtrA, HostDataA.data(), N).wait();
+  Queue.copy(PtrB, HostDataB.data(), N).wait();
+  for (size_t i = 0; i < N; i++) {
+    assert(HostDataA[i] == (i * 2));
+    assert(HostDataB[i] == 0);
+  }
+
+  // Swap PtrB to be the input/output
+  DynParam.update(PtrB);
+  ExecGraph.update({NodeA, NodeB});
+  Queue.ext_oneapi_graph(ExecGraph).wait();
+
+  Queue.copy(PtrA, HostDataA.data(), N).wait();
+  Queue.copy(PtrB, HostDataB.data(), N).wait();
+  for (size_t i = 0; i < N; i++) {
+    const size_t Ref = i * 2;
+    assert(HostDataA[i] == Ref);
+    assert(HostDataB[i] == Ref);
+  }
+#endif
+  sycl::free(PtrA, Queue);
+  sycl::free(PtrB, Queue);
+
+  return 0;
+}
diff --git a/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_double_update.cpp b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_double_update.cpp
new file mode 100644
index 0000000000000..1c66a03eac4d3
--- /dev/null
+++ b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_double_update.cpp
@@ -0,0 +1,81 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+//
+// XFAIL: cuda
+// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
+
+// Tests updating a graph node using index-based explicit update
+
+#include "../../graph_common.hpp"
+#include "free_function_kernels.hpp"
+
+int main() {
+  queue Queue{};
+  context Ctxt{Queue.get_context()};
+
+  exp_ext::command_graph Graph{Ctxt, Queue.get_device()};
+
+  int *PtrA = malloc_device<int>(Size, Queue);
+  int *PtrB = malloc_device<int>(Size, Queue);
+  int *PtrUnused = malloc_device<int>(Size, Queue);
+
+  std::vector<int> HostDataA(Size);
+  std::vector<int> HostDataB(Size);
+  std::vector<int> HostDataUnused(Size);
+
+  Queue.memset(PtrA, 0, Size * sizeof(int)).wait();
+  Queue.memset(PtrB, 0, Size * sizeof(int)).wait();
+  Queue.memset(PtrUnused, 0, Size * sizeof(int)).wait();
+
+  exp_ext::dynamic_parameter InputParam(Graph, PtrA);
+
+#ifndef __SYCL_DEVICE_ONLY__
+  kernel_bundle Bundle = get_kernel_bundle<bundle_state::executable>(Ctxt);
+  kernel_id Kernel_id = exp_ext::get_kernel_id<ff_0>();
+  kernel Kernel = Bundle.get_kernel(Kernel_id);
+  auto KernelNode = Graph.add([&](handler &cgh) {
+    cgh.set_arg(0, InputParam);
+    cgh.single_task(Kernel);
+  });
+
+  auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{});
+
+  // PtrA should be filled with values
+  Queue.ext_oneapi_graph(ExecGraph).wait();
+
+  Queue.copy(PtrA, HostDataA.data(), Size).wait();
+  Queue.copy(PtrB, HostDataB.data(), Size).wait();
+  Queue.copy(PtrUnused, HostDataUnused.data(), Size).wait();
+  for (size_t i = 0; i < Size; i++) {
+    assert(HostDataA[i] == i);
+    assert(HostDataB[i] == 0);
+    assert(HostDataUnused[i] == 0);
+  }
+
+  // Swap PtrUnused to be the input, then swap to PtrB without executing
+  InputParam.update(PtrUnused);
+  InputParam.update(PtrB);
+
+  ExecGraph.update(KernelNode);
+  Queue.ext_oneapi_graph(ExecGraph).wait();
+
+  Queue.copy(PtrA, HostDataA.data(), Size).wait();
+  Queue.copy(PtrB, HostDataB.data(), Size).wait();
+  Queue.copy(PtrUnused, HostDataUnused.data(), Size).wait();
+  for (size_t i = 0; i < Size; i++) {
+    assert(HostDataA[i] == i);
+    assert(HostDataB[i] == i);
+    // Check that PtrUnused was never actually used in a kernel
+    assert(HostDataUnused[i] == 0);
+  }
+#endif
+  sycl::free(PtrA, Queue);
+  sycl::free(PtrB, Queue);
+  sycl::free(PtrUnused, Queue);
+
+  return 0;
+}
diff --git a/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_multiple_nodes.cpp b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_multiple_nodes.cpp
new file mode 100644
index 0000000000000..1143b1edf855a
--- /dev/null
+++ b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_multiple_nodes.cpp
@@ -0,0 +1,80 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+//
+// XFAIL: cuda
+// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
+
+// Tests updating a single dynamic parameter which is registered with multiple
+// graph nodes
+
+#include "../../graph_common.hpp"
+#include "free_function_kernels.hpp"
+
+int main() {
+  queue Queue{};
+  context Ctxt{Queue.get_context()};
+
+  exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};
+
+  int *PtrA = malloc_device<int>(Size, Queue);
+  int *PtrB = malloc_device<int>(Size, Queue);
+
+  std::vector<int> HostDataA(Size);
+  std::vector<int> HostDataB(Size);
+
+  Queue.memset(PtrA, 0, Size * sizeof(int)).wait();
+  Queue.memset(PtrB, 0, Size * sizeof(int)).wait();
+
+  exp_ext::dynamic_parameter InputParam(Graph, PtrA);
+
+#ifndef __SYCL_DEVICE_ONLY__
+  kernel_bundle Bundle = get_kernel_bundle<bundle_state::executable>(Ctxt);
+  kernel_id Kernel_id_A = exp_ext::get_kernel_id<ff_0>();
+  kernel Kernel_A = Bundle.get_kernel(Kernel_id_A);
+  auto KernelNodeA = Graph.add([&](handler &cgh) {
+    cgh.set_arg(0, InputParam);
+    cgh.single_task(Kernel_A);
+  });
+
+  kernel_id Kernel_id_B = exp_ext::get_kernel_id<ff_1>();
+  kernel Kernel_B = Bundle.get_kernel(Kernel_id_B);
+  auto KernelNodeB = Graph.add(
+      [&](handler &cgh) {
+        cgh.set_arg(0, InputParam);
+        cgh.single_task(Kernel_B);
+      },
+      exp_ext::property::node::depends_on{KernelNodeA});
+
+  auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{});
+
+  // PtrA should be filled with values
+  Queue.ext_oneapi_graph(ExecGraph).wait();
+
+  Queue.copy(PtrA, HostDataA.data(), Size).wait();
+  Queue.copy(PtrB, HostDataB.data(), Size).wait();
+  for (size_t i = 0; i < Size; i++) {
+    assert(HostDataA[i] == i * 2);
+    assert(HostDataB[i] == 0);
+  }
+
+  // Swap PtrB to be the input
+  InputParam.update(PtrB);
+  ExecGraph.update({KernelNodeA, KernelNodeB});
+  Queue.ext_oneapi_graph(ExecGraph).wait();
+
+  Queue.copy(PtrA, HostDataA.data(), Size).wait();
+  Queue.copy(PtrB, HostDataB.data(), Size).wait();
+  for (size_t i = 0; i < Size; i++) {
+    assert(HostDataA[i] == i * 2);
+    assert(HostDataB[i] == i * 2);
+  }
+#endif
+  sycl::free(PtrA, Queue);
+  sycl::free(PtrB, Queue);
+
+  return 0;
+}
diff --git a/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_multiple_params.cpp b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_multiple_params.cpp
new file mode 100644
index 0000000000000..76c66a27838e1
--- /dev/null
+++ b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_multiple_params.cpp
@@ -0,0 +1,88 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+//
+// XFAIL: cuda
+// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
+
+// Tests updating multiple parameters to a singlegraph node using index-based
+// explicit update
+
+#include "../../graph_common.hpp"
+#include "free_function_kernels.hpp"
+
+int main() {
+  queue Queue{};
+  context Ctxt{Queue.get_context()};
+
+  exp_ext::command_graph Graph{Ctxt, Queue.get_device()};
+
+  int *PtrA = malloc_device<int>(Size, Queue);
+  int *PtrB = malloc_device<int>(Size, Queue);
+  int *PtrC = malloc_device<int>(Size, Queue);
+
+  std::vector<int> HostDataA(Size);
+  std::vector<int> HostDataB(Size);
+  std::vector<int> HostDataC(Size);
+  std::vector<int> OutData(Size);
+
+  std::iota(HostDataA.begin(), HostDataA.end(), 10);
+  std::iota(HostDataB.begin(), HostDataB.end(), 100);
+
+  Queue.memcpy(PtrA, HostDataA.data(), Size * sizeof(int)).wait();
+  Queue.memcpy(PtrB, HostDataB.data(), Size * sizeof(int)).wait();
+  Queue.memset(PtrC, 0, Size * sizeof(int)).wait();
+
+  exp_ext::dynamic_parameter ParamA(Graph, PtrA);
+  exp_ext::dynamic_parameter ParamB(Graph, PtrB);
+  exp_ext::dynamic_parameter ParamOut(Graph, PtrC);
+
+  nd_range<1> NDRange{Size, 32};
+
+#ifndef __SYCL_DEVICE_ONLY__
+  kernel_bundle Bundle = get_kernel_bundle<bundle_state::executable>(Ctxt);
+  kernel_id Kernel_id = exp_ext::get_kernel_id<ff_5>();
+  kernel Kernel = Bundle.get_kernel(Kernel_id);
+  auto KernelNode = Graph.add([&](handler &cgh) {
+    cgh.set_arg(0, ParamA);
+    cgh.set_arg(1, ParamB);
+    cgh.set_arg(2, ParamOut);
+    cgh.parallel_for(NDRange, Kernel);
+  });
+
+  auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{});
+
+  // PtrA should be filled with values
+  Queue.ext_oneapi_graph(ExecGraph).wait();
+
+  // Copy to output data to preserve original data for verifying += op
+  Queue.copy(PtrC, OutData.data(), Size).wait();
+  for (size_t i = 0; i < Size; i++) {
+    assert(OutData[i] == HostDataC[i] + (HostDataA[i] * HostDataB[i]));
+  }
+
+  // Update C's host data
+  HostDataC = OutData;
+
+  // Swap PtrB to be the input
+  ParamOut.update(PtrB);
+  ParamB.update(PtrC);
+
+  ExecGraph.update(KernelNode);
+  Queue.ext_oneapi_graph(ExecGraph).wait();
+
+  // Copy to output data to preserve original data for verifying += op
+  Queue.copy(PtrB, OutData.data(), Size).wait();
+  for (size_t i = 0; i < Size; i++) {
+    assert(OutData[i] == HostDataB[i] + (HostDataA[i] * HostDataC[i]));
+  }
+#endif
+  sycl::free(PtrA, Queue);
+  sycl::free(PtrB, Queue);
+  sycl::free(PtrC, Queue);
+
+  return 0;
+}
diff --git a/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_subgraph.cpp b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_subgraph.cpp
new file mode 100644
index 0000000000000..d8b4083fed1e7
--- /dev/null
+++ b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_subgraph.cpp
@@ -0,0 +1,79 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+//
+// XFAIL: cuda
+// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
+
+// Tests updating a graph node in an executable graph that was used as a
+// subgraph node in another executable graph is not reflected in the graph
+// containing the subgraph node.
+
+#include "../../graph_common.hpp"
+#include "free_function_kernels.hpp"
+
+int main() {
+  queue Queue{};
+  context Ctxt{Queue.get_context()};
+
+  exp_ext::command_graph Graph{Ctxt, Queue.get_device()};
+  exp_ext::command_graph SubGraph{Ctxt, Queue.get_device()};
+
+  int *PtrA = malloc_device<int>(Size, Queue);
+  int *PtrB = malloc_device<int>(Size, Queue);
+
+  std::vector<int> HostDataA(Size);
+  std::vector<int> HostDataB(Size);
+
+  Queue.memset(PtrA, 0, Size * sizeof(int)).wait();
+  Queue.memset(PtrB, 0, Size * sizeof(int)).wait();
+
+  exp_ext::dynamic_parameter InputParam(SubGraph, PtrA);
+
+#ifndef __SYCL_DEVICE_ONLY__
+  kernel_bundle Bundle = get_kernel_bundle<bundle_state::executable>(Ctxt);
+  kernel_id SubKernel_id = exp_ext::get_kernel_id<ff_1>();
+  kernel SubKernel = Bundle.get_kernel(SubKernel_id);
+  auto SubKernelNode = SubGraph.add([&](handler &cgh) {
+    cgh.set_arg(0, InputParam);
+    cgh.single_task(SubKernel);
+  });
+
+  auto SubExecGraph = SubGraph.finalize(exp_ext::property::graph::updatable{});
+
+  kernel_id Kernel_id = exp_ext::get_kernel_id<ff_0>();
+  kernel Kernel = Bundle.get_kernel(Kernel_id);
+  auto KernelNode = Graph.add([&](handler &cgh) {
+    cgh.set_arg(0, PtrA);
+    cgh.single_task(Kernel);
+  });
+
+  Graph.add([&](handler &cgh) { cgh.ext_oneapi_graph(SubExecGraph); },
+            exp_ext::property::node::depends_on{KernelNode});
+
+  // Finalize the parent graph with the original values
+  auto ExecGraph = Graph.finalize();
+
+  // Swap PtrB to be the input
+  InputParam.update(PtrB);
+  // Update the executable graph that was used as a subgraph with the new value,
+  // this should not affect ExecGraph
+  SubExecGraph.update(SubKernelNode);
+  // Only PtrA should be filled with values
+  Queue.ext_oneapi_graph(ExecGraph).wait();
+
+  Queue.copy(PtrA, HostDataA.data(), Size).wait();
+  Queue.copy(PtrB, HostDataB.data(), Size).wait();
+  for (size_t i = 0; i < Size; i++) {
+    assert(HostDataA[i] == i * 2);
+    assert(HostDataB[i] == 0);
+  }
+#endif
+  sycl::free(PtrA, Queue);
+  sycl::free(PtrB, Queue);
+
+  return 0;
+}
diff --git a/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_scalar.cpp b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_scalar.cpp
new file mode 100644
index 0000000000000..e69e4097e4b50
--- /dev/null
+++ b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_scalar.cpp
@@ -0,0 +1,65 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+//
+// XFAIL: cuda
+// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
+
+// Tests updating a graph node scalar argument using index-based explicit update
+
+#include "../../graph_common.hpp"
+#include "free_function_kernels.hpp"
+
+int main() {
+  queue Queue{};
+  context Ctxt{Queue.get_context()};
+
+  exp_ext::command_graph Graph{Ctxt, Queue.get_device()};
+
+  int *DeviceData = malloc_device<int>(Size, Queue);
+
+  int ScalarValue = 17;
+
+  std::vector<int> HostData(Size);
+
+  Queue.memset(DeviceData, 0, Size * sizeof(int)).wait();
+
+  exp_ext::dynamic_parameter InputParam(Graph, ScalarValue);
+
+#ifndef __SYCL_DEVICE_ONLY__
+  kernel_bundle Bundle = get_kernel_bundle<bundle_state::executable>(Ctxt);
+  kernel_id Kernel_id = exp_ext::get_kernel_id<ff_6>();
+  kernel Kernel = Bundle.get_kernel(Kernel_id);
+  auto KernelNode = Graph.add([&](handler &cgh) {
+    cgh.set_arg(0, DeviceData);
+    cgh.set_arg(1, InputParam);
+    cgh.single_task(Kernel);
+  });
+
+  auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{});
+
+  // DeviceData should be filled with current ScalarValue (17)
+  Queue.ext_oneapi_graph(ExecGraph).wait();
+
+  Queue.copy(DeviceData, HostData.data(), Size).wait();
+  for (size_t i = 0; i < Size; i++) {
+    assert(HostData[i] == 17);
+  }
+
+  // Update ScalarValue to be 99 instead
+  InputParam.update(99);
+  ExecGraph.update(KernelNode);
+  Queue.ext_oneapi_graph(ExecGraph).wait();
+
+  Queue.copy(DeviceData, HostData.data(), Size).wait();
+  for (size_t i = 0; i < Size; i++) {
+    assert(HostData[i] == 99);
+  }
+#endif
+  sycl::free(DeviceData, Queue);
+
+  return 0;
+}
diff --git a/sycl/test-e2e/Graph/graph_common.hpp b/sycl/test-e2e/Graph/graph_common.hpp
index d60049a71cca3..4b5a4c6c9b41f 100644
--- a/sycl/test-e2e/Graph/graph_common.hpp
+++ b/sycl/test-e2e/Graph/graph_common.hpp
@@ -1,3 +1,5 @@
+#pragma once
+
 #include <sycl/detail/core.hpp>
 #include <sycl/usm.hpp>