intel
diff --git a/‎sycl/test-e2e/Graph/Update/FreeFunctionKernels/free_function_kernels.hpp‎
Lines changed: 7 additions & 9 deletions b/‎sycl/test-e2e/Graph/Update/FreeFunctionKernels/free_function_kernels.hpp‎
Lines changed: 7 additions & 9 deletions
diff --git a/‎sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_before_finalize.cpp‎
Lines changed: 13 additions & 14 deletions b/‎sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_before_finalize.cpp‎
Lines changed: 13 additions & 14 deletions
diff --git a/‎sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_multiple_exec_graphs.cpp‎
Lines changed: 16 additions & 17 deletions b/‎sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_multiple_exec_graphs.cpp‎
Lines changed: 16 additions & 17 deletions
diff --git a/‎sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ordering.cpp‎
Lines changed: 4 additions & 2 deletions b/‎sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ordering.cpp‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr.cpp‎
Lines changed: 16 additions & 17 deletions b/‎sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr.cpp‎
Lines changed: 16 additions & 17 deletions
diff --git a/‎sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_3D.cpp‎
Lines changed: 4 additions & 2 deletions b/‎sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_3D.cpp‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_double_update.cpp‎
Lines changed: 22 additions & 22 deletions b/‎sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_double_update.cpp‎
Lines changed: 22 additions & 22 deletions
@@ -1,20 +1,18 @@
+#include "../../graph_common.hpp"
 #include "sycl/ext/oneapi/kernel_properties/properties.hpp"
 #include "sycl/kernel_bundle.hpp"
 #include <sycl/ext/oneapi/free_function_queries.hpp>
 
-namespace exp_ext = sycl::ext::oneapi::experimental;
-using namespace sycl;
-
 SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::single_task_kernel))
-void ff_0(int *ptr, size_t size) {
-  for (size_t i{0}; i < size; ++i) {
+void ff_0(int *ptr) {
+  for (size_t i{0}; i < Size; ++i) {
     ptr[i] = i;
   }
 }
 
 SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::single_task_kernel))
-void ff_1(int *ptr, size_t size) {
-  for (size_t i{0}; i < size; ++i) {
+void ff_1(int *ptr) {
+  for (size_t i{0}; i < Size; ++i) {
     ptr[i] += i;
   }
 }
@@ -49,8 +47,8 @@ void ff_5(int *ptrA, int *ptrB, int *ptrC) {
 }
 
 SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::single_task_kernel))
-void ff_6(int *ptr, int scalarValue, size_t size) {
-  for (size_t i{0}; i < size; ++i) {
+void ff_6(int *ptr, int scalarValue) {
+  for (size_t i{0}; i < Size; ++i) {
     ptr[i] = scalarValue;
   }
 }
@@ -5,8 +5,8 @@
 // Extra run to check for immediate-command-list in Level Zero
 // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
 //
-// The name mangling for free function kernels currently does not work with PTX.
-// UNSUPPORTED: cuda
+// XFAIL: cuda
+// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
 
 // Tests updating a graph node before finalization
 
@@ -17,18 +17,16 @@ int main() {
   queue Queue{};
   context ctxt{Queue.get_context()};
 
-  const size_t N = 1024;
-
   exp_ext::command_graph Graph{ctxt, Queue.get_device()};
 
-  int *PtrA = malloc_device<int>(N, Queue);
-  int *PtrB = malloc_device<int>(N, Queue);
+  int *PtrA = malloc_device<int>(Size, Queue);
+  int *PtrB = malloc_device<int>(Size, Queue);
 
-  std::vector<int> HostDataA(N);
-  std::vector<int> HostDataB(N);
+  std::vector<int> HostDataA(Size);
+  std::vector<int> HostDataB(Size);
 
-  Queue.memset(PtrA, 0, N * sizeof(int)).wait();
-  Queue.memset(PtrB, 0, N * sizeof(int)).wait();
+  Queue.memset(PtrA, 0, Size * sizeof(int)).wait();
+  Queue.memset(PtrB, 0, Size * sizeof(int)).wait();
 
   exp_ext::dynamic_parameter InputParam(Graph, PtrA);
 
@@ -38,7 +36,6 @@ int main() {
   kernel Kernel = Bundle.get_kernel(Kernel_id);
   auto KernelNode = Graph.add([&](handler &cgh) {
     cgh.set_arg(0, InputParam);
-    cgh.set_arg(1, N);
     cgh.single_task(Kernel);
   });
   // Swap PtrB to be the input
@@ -49,12 +46,14 @@ int main() {
   // Only PtrB should be filled with values
   Queue.ext_oneapi_graph(ExecGraph).wait();
 
-  Queue.copy(PtrA, HostDataA.data(), N).wait();
-  Queue.copy(PtrB, HostDataB.data(), N).wait();
-  for (size_t i = 0; i < N; i++) {
+  Queue.copy(PtrA, HostDataA.data(), Size).wait();
+  Queue.copy(PtrB, HostDataB.data(), Size).wait();
+  for (size_t i = 0; i < Size; i++) {
     assert(HostDataA[i] == 0);
     assert(HostDataB[i] == i);
   }
+  sycl::free(PtrA, Queue);
+  sycl::free(PtrB, Queue);
 #endif
   return 0;
 }
@@ -5,8 +5,8 @@
 // Extra run to check for immediate-command-list in Level Zero
 // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
 //
-// The name mangling for free function kernels currently does not work with PTX.
-// UNSUPPORTED: cuda
+// XFAIL: cuda
+// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
 
 // Tests creating multiple executable graphs from the same modifiable graph and
 // only updating one of them.
@@ -18,18 +18,16 @@ int main() {
   queue Queue{};
   context ctxt{Queue.get_context()};
 
-  const size_t N = 1024;
-
   exp_ext::command_graph Graph{ctxt, Queue.get_device()};
 
-  int *PtrA = malloc_device<int>(N, Queue);
-  int *PtrB = malloc_device<int>(N, Queue);
+  int *PtrA = malloc_device<int>(Size, Queue);
+  int *PtrB = malloc_device<int>(Size, Queue);
 
-  std::vector<int> HostDataA(N);
-  std::vector<int> HostDataB(N);
+  std::vector<int> HostDataA(Size);
+  std::vector<int> HostDataB(Size);
 
-  Queue.memset(PtrA, 0, N * sizeof(int)).wait();
-  Queue.memset(PtrB, 0, N * sizeof(int)).wait();
+  Queue.memset(PtrA, 0, Size * sizeof(int)).wait();
+  Queue.memset(PtrB, 0, Size * sizeof(int)).wait();
 
   exp_ext::dynamic_parameter InputParam(Graph, PtrA);
 
@@ -39,7 +37,6 @@ int main() {
   kernel Kernel = Bundle.get_kernel(Kernel_id);
   auto KernelNode = Graph.add([&](handler &cgh) {
     cgh.set_arg(0, InputParam);
-    cgh.set_arg(1, N);
     cgh.single_task(Kernel);
   });
 
@@ -50,9 +47,9 @@ int main() {
   Queue.ext_oneapi_graph(ExecGraph).wait();
   Queue.ext_oneapi_graph(ExecGraph2).wait();
 
-  Queue.copy(PtrA, HostDataA.data(), N).wait();
-  Queue.copy(PtrB, HostDataB.data(), N).wait();
-  for (size_t i = 0; i < N; i++) {
+  Queue.copy(PtrA, HostDataA.data(), Size).wait();
+  Queue.copy(PtrB, HostDataB.data(), Size).wait();
+  for (size_t i = 0; i < Size; i++) {
     assert(HostDataA[i] == i * 2);
     assert(HostDataB[i] == 0);
   }
@@ -65,13 +62,15 @@ int main() {
   Queue.ext_oneapi_graph(ExecGraph).wait();
   Queue.ext_oneapi_graph(ExecGraph2).wait();
 
-  Queue.copy(PtrA, HostDataA.data(), N).wait();
-  Queue.copy(PtrB, HostDataB.data(), N).wait();
-  for (size_t i = 0; i < N; i++) {
+  Queue.copy(PtrA, HostDataA.data(), Size).wait();
+  Queue.copy(PtrB, HostDataB.data(), Size).wait();
+  for (size_t i = 0; i < Size; i++) {
     // A should have been modified 3 times by now, B only once
     assert(HostDataA[i] == i * 3);
     assert(HostDataB[i] == i);
   }
+  sycl::free(PtrA, Queue);
+  sycl::free(PtrB, Queue);
 #endif
   return 0;
 }
@@ -5,8 +5,8 @@
 // Extra run to check for immediate-command-list in Level Zero
 // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
 //
-// The name mangling for free function kernels currently does not work with PTX.
-// UNSUPPORTED: cuda
+// XFAIL: cuda
+// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
 
 // Tests that updating a graph is ordered with respect to previous executions of
 // the graph which may be in flight.
@@ -72,6 +72,8 @@ int main() {
     assert(HostDataA[i] == i * NumKernelLoops * NumSubmitLoops);
     assert(HostDataB[i] == i * NumKernelLoops * NumSubmitLoops);
   }
+  sycl::free(PtrA, Queue);
+  sycl::free(PtrB, Queue);
 #endif
   return 0;
 }
@@ -5,8 +5,8 @@
 // Extra run to check for immediate-command-list in Level Zero
 // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
 //
-// The name mangling for free function kernels currently does not work with PTX.
-// UNSUPPORTED: cuda
+// XFAIL: cuda
+// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
 
 // Tests updating a graph node using index-based explicit update
 
@@ -17,18 +17,16 @@ int main() {
   queue Queue{};
   context ctxt{Queue.get_context()};
 
-  const size_t N = 1024;
-
   exp_ext::command_graph Graph{ctxt, Queue.get_device()};
 
-  int *PtrA = malloc_device<int>(N, Queue);
-  int *PtrB = malloc_device<int>(N, Queue);
+  int *PtrA = malloc_device<int>(Size, Queue);
+  int *PtrB = malloc_device<int>(Size, Queue);
 
-  std::vector<int> HostDataA(N);
-  std::vector<int> HostDataB(N);
+  std::vector<int> HostDataA(Size);
+  std::vector<int> HostDataB(Size);
 
-  Queue.memset(PtrA, 0, N * sizeof(int)).wait();
-  Queue.memset(PtrB, 0, N * sizeof(int)).wait();
+  Queue.memset(PtrA, 0, Size * sizeof(int)).wait();
+  Queue.memset(PtrB, 0, Size * sizeof(int)).wait();
 
   exp_ext::dynamic_parameter InputParam(Graph, PtrA);
 
@@ -38,7 +36,6 @@ int main() {
   kernel Kernel = Bundle.get_kernel(Kernel_id);
   auto KernelNode = Graph.add([&](handler &cgh) {
     cgh.set_arg(0, InputParam);
-    cgh.set_arg(0, N);
     cgh.single_task(Kernel);
   });
 
@@ -47,9 +44,9 @@ int main() {
   // PtrA should be filled with values
   Queue.ext_oneapi_graph(ExecGraph).wait();
 
-  Queue.copy(PtrA, HostDataA.data(), N).wait();
-  Queue.copy(PtrB, HostDataB.data(), N).wait();
-  for (size_t i = 0; i < N; i++) {
+  Queue.copy(PtrA, HostDataA.data(), Size).wait();
+  Queue.copy(PtrB, HostDataB.data(), Size).wait();
+  for (size_t i = 0; i < Size; i++) {
     assert(HostDataA[i] == i);
     assert(HostDataB[i] == 0);
   }
@@ -59,12 +56,14 @@ int main() {
   ExecGraph.update(KernelNode);
   Queue.ext_oneapi_graph(ExecGraph).wait();
 
-  Queue.copy(PtrA, HostDataA.data(), N).wait();
-  Queue.copy(PtrB, HostDataB.data(), N).wait();
-  for (size_t i = 0; i < N; i++) {
+  Queue.copy(PtrA, HostDataA.data(), Size).wait();
+  Queue.copy(PtrB, HostDataB.data(), Size).wait();
+  for (size_t i = 0; i < Size; i++) {
     assert(HostDataA[i] == i);
     assert(HostDataB[i] == i);
   }
+  sycl::free(PtrA, Queue);
+  sycl::free(PtrB, Queue);
 #endif
   return 0;
 }
@@ -5,8 +5,8 @@
 // Extra run to check for immediate-command-list in Level Zero
 // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
 //
-// The name mangling for free function kernels currently does not work with PTX.
-// UNSUPPORTED: cuda
+// XFAIL: cuda
+// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
 
 // Tests updating a 3D ND-Range graph kernel node using index-based explicit
 // update
@@ -79,6 +79,8 @@ int main() {
     assert(HostDataA[i] == Ref);
     assert(HostDataB[i] == Ref);
   }
+  sycl::free(PtrA, Queue);
+  sycl::free(PtrB, Queue);
 #endif
   return 0;
 }
@@ -5,8 +5,8 @@
 // Extra run to check for immediate-command-list in Level Zero
 // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
 //
-// The name mangling for free function kernels currently does not work with PTX.
-// UNSUPPORTED: cuda
+// XFAIL: cuda
+// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
 
 // Tests updating a graph node using index-based explicit update
 
@@ -17,21 +17,19 @@ int main() {
   queue Queue{};
   context ctxt{Queue.get_context()};
 
-  const size_t N = 1024;
-
   exp_ext::command_graph Graph{ctxt, Queue.get_device()};
 
-  int *PtrA = malloc_device<int>(N, Queue);
-  int *PtrB = malloc_device<int>(N, Queue);
-  int *PtrUnused = malloc_device<int>(N, Queue);
+  int *PtrA = malloc_device<int>(Size, Queue);
+  int *PtrB = malloc_device<int>(Size, Queue);
+  int *PtrUnused = malloc_device<int>(Size, Queue);
 
-  std::vector<int> HostDataA(N);
-  std::vector<int> HostDataB(N);
-  std::vector<int> HostDataUnused(N);
+  std::vector<int> HostDataA(Size);
+  std::vector<int> HostDataB(Size);
+  std::vector<int> HostDataUnused(Size);
 
-  Queue.memset(PtrA, 0, N * sizeof(int)).wait();
-  Queue.memset(PtrB, 0, N * sizeof(int)).wait();
-  Queue.memset(PtrUnused, 0, N * sizeof(int)).wait();
+  Queue.memset(PtrA, 0, Size * sizeof(int)).wait();
+  Queue.memset(PtrB, 0, Size * sizeof(int)).wait();
+  Queue.memset(PtrUnused, 0, Size * sizeof(int)).wait();
 
   exp_ext::dynamic_parameter InputParam(Graph, PtrA);
 
@@ -41,7 +39,6 @@ int main() {
   kernel Kernel = Bundle.get_kernel(Kernel_id);
   auto KernelNode = Graph.add([&](handler &cgh) {
     cgh.set_arg(0, InputParam);
-    cgh.set_arg(1, N);
     cgh.single_task(Kernel);
   });
 
@@ -50,10 +47,10 @@ int main() {
   // PtrA should be filled with values
   Queue.ext_oneapi_graph(ExecGraph).wait();
 
-  Queue.copy(PtrA, HostDataA.data(), N).wait();
-  Queue.copy(PtrB, HostDataB.data(), N).wait();
-  Queue.copy(PtrUnused, HostDataUnused.data(), N).wait();
-  for (size_t i = 0; i < N; i++) {
+  Queue.copy(PtrA, HostDataA.data(), Size).wait();
+  Queue.copy(PtrB, HostDataB.data(), Size).wait();
+  Queue.copy(PtrUnused, HostDataUnused.data(), Size).wait();
+  for (size_t i = 0; i < Size; i++) {
     assert(HostDataA[i] == i);
     assert(HostDataB[i] == 0);
     assert(HostDataUnused[i] == 0);
@@ -66,15 +63,18 @@ int main() {
   ExecGraph.update(KernelNode);
   Queue.ext_oneapi_graph(ExecGraph).wait();
 
-  Queue.copy(PtrA, HostDataA.data(), N).wait();
-  Queue.copy(PtrB, HostDataB.data(), N).wait();
-  Queue.copy(PtrUnused, HostDataUnused.data(), N).wait();
-  for (size_t i = 0; i < N; i++) {
+  Queue.copy(PtrA, HostDataA.data(), Size).wait();
+  Queue.copy(PtrB, HostDataB.data(), Size).wait();
+  Queue.copy(PtrUnused, HostDataUnused.data(), Size).wait();
+  for (size_t i = 0; i < Size; i++) {
     assert(HostDataA[i] == i);
     assert(HostDataB[i] == i);
     // Check that PtrUnused was never actually used in a kernel
     assert(HostDataUnused[i] == 0);
   }
+  sycl::free(PtrA, Queue);
+  sycl::free(PtrB, Queue);
+  sycl::free(PtrUnused, Queue);
 #endif
   return 0;
 }
Original file line number	Diff line number	Diff line change
`@@ -1,20 +1,18 @@`
	`1`	`+#include "../../graph_common.hpp"`
`1`	`2`	`#include "sycl/ext/oneapi/kernel_properties/properties.hpp"`
`2`	`3`	`#include "sycl/kernel_bundle.hpp"`
`3`	`4`	`#include <sycl/ext/oneapi/free_function_queries.hpp>`
`4`	`5`
`5`		`-namespace exp_ext = sycl::ext::oneapi::experimental;`
`6`		`-using namespace sycl;`
`7`		`-`
`8`	`6`	`SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::single_task_kernel))`
`9`		`-void ff_0(int *ptr, size_t size) {`
`10`		`- for (size_t i{0}; i < size; ++i) {`
	`7`	`+void ff_0(int *ptr) {`
	`8`	`+ for (size_t i{0}; i < Size; ++i) {`
`11`	`9`	`ptr[i] = i;`
`12`	`10`	`}`
`13`	`11`	`}`
`14`	`12`
`15`	`13`	`SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::single_task_kernel))`
`16`		`-void ff_1(int *ptr, size_t size) {`
`17`		`- for (size_t i{0}; i < size; ++i) {`
	`14`	`+void ff_1(int *ptr) {`
	`15`	`+ for (size_t i{0}; i < Size; ++i) {`
`18`	`16`	`ptr[i] += i;`
`19`	`17`	`}`
`20`	`18`	`}`
`@@ -49,8 +47,8 @@ void ff_5(int ptrA, int ptrB, int *ptrC) {`
`49`	`47`	`}`
`50`	`48`
`51`	`49`	`SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((exp_ext::single_task_kernel))`
`52`		`-void ff_6(int *ptr, int scalarValue, size_t size) {`
`53`		`- for (size_t i{0}; i < size; ++i) {`
	`50`	`+void ff_6(int *ptr, int scalarValue) {`
	`51`	`+ for (size_t i{0}; i < Size; ++i) {`
`54`	`52`	`ptr[i] = scalarValue;`
`55`	`53`	`}`
`56`	`54`	`}`