intel
diff --git a/‎sycl/test-e2e/BFloat16/bfloat16_example.cpp‎
Lines changed: 4 additions & 69 deletions b/‎sycl/test-e2e/BFloat16/bfloat16_example.cpp‎
Lines changed: 4 additions & 69 deletions
diff --git a/‎sycl/test-e2e/BFloat16/bfloat16_example.hpp‎
Lines changed: 47 additions & 0 deletions b/‎sycl/test-e2e/BFloat16/bfloat16_example.hpp‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎sycl/test-e2e/BFloat16/bfloat16_example_aot.cpp‎
Lines changed: 21 additions & 0 deletions b/‎sycl/test-e2e/BFloat16/bfloat16_example_aot.cpp‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎sycl/test-e2e/BFloat16/bfloat16_example_aot_cpu.cpp‎
Lines changed: 18 additions & 0 deletions b/‎sycl/test-e2e/BFloat16/bfloat16_example_aot_cpu.cpp‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎sycl/test-e2e/BFloat16/bfloat16_example_aot_gpu.cpp‎
Lines changed: 18 additions & 0 deletions b/‎sycl/test-e2e/BFloat16/bfloat16_example_aot_gpu.cpp‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎sycl/test-e2e/ESIMD/ext_math_ieee_sqrt_div.cpp‎
Lines changed: 1 addition & 1 deletion b/‎sycl/test-e2e/ESIMD/ext_math_ieee_sqrt_div.cpp‎
Lines changed: 1 addition & 1 deletion
@@ -1,80 +1,15 @@
 ///
-/// Check if bfloat16 example works using fallback libraries
+/// Checks a simple case of bfloat16, also employed for AOT library fallback.
 ///
 
-// REQUIRES: opencl-aot, ocloc, gpu-intel-gen9
-
 // CUDA is not compatible with SPIR.
 // UNSUPPORTED: cuda
 
 // RUN: %clangxx -fsycl %s -o %t.out
 // RUN: %{run} %t.out
 
-// RUN: %clangxx -fsycl -fsycl-targets=spir64 %s -o %t.out
-// RUN: %{run} %t.out
-
-// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device gen9" %s -o %t.out
-// RUN: %{run} %t.out
-
-// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device *" %s -o %t.out
-// RUN: %if gpu %{ %{run} %t.out %}
-
-// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" %s -o %t.out
-// RUN: %{run} %t.out
-
-// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" %s -o %t.out
-// RUN: %{run} %t.out
-
-// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" %s -o %t.out
-// RUN: %if cpu %{ %{run} %t.out %}
-
-// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" %s -o %t.out
-// RUN: %if cpu %{ %{run} %t.out %}
-
-#include <sycl/detail/core.hpp>
-#include <sycl/ext/oneapi/bfloat16.hpp>
-
-using namespace sycl;
-using sycl::ext::oneapi::bfloat16;
-
-float foo(float a, float b) {
-  // Convert from float to bfloat16.
-  bfloat16 A{a};
-  bfloat16 B{b};
-
-  // Convert A and B from bfloat16 to float, do addition on floating-point
-  // numbers, then convert the result to bfloat16 and store it in C.
-  bfloat16 C = A + B;
-
-  // Return the result converted from bfloat16 to float.
-  return C;
-}
-
-int main(int argc, char *argv[]) {
-  float data[3] = {7.0f, 8.1f, 0.0f};
-
-  float result_host = foo(7.0f, 8.1f);
-  std::cout << "CPU Result = " << result_host << std::endl;
-  if (std::abs(15.1f - result_host) > 0.1f) {
-    std::cout << "Test failed. Expected CPU Result ~= 15.1" << std::endl;
-    return 1;
-  }
-
-  queue deviceQueue;
-  buffer<float, 1> buf{data, 3};
-
-  deviceQueue.submit([&](handler &cgh) {
-    accessor numbers{buf, cgh, read_write};
-    cgh.single_task([=]() { numbers[2] = foo(numbers[0], numbers[1]); });
-  });
-
-  host_accessor hostOutAcc{buf, read_only};
-  float result_device = hostOutAcc[2];
-  std::cout << "GPU Result = " << result_device << std::endl;
-  if (std::abs(result_host - result_device) > 0.1f) {
-    std::cout << "Test failed. CPU Result !~= GPU result" << std::endl;
-    return 1;
-  }
+#include "bfloat16_example.hpp"
 
-  return 0;
+int main() {
+  return runTest();
 }
@@ -0,0 +1,47 @@
+#include <sycl/detail/core.hpp>
+#include <sycl/ext/oneapi/bfloat16.hpp>
+
+using namespace sycl;
+using sycl::ext::oneapi::bfloat16;
+
+float foo(float a, float b) {
+  // Convert from float to bfloat16.
+  bfloat16 A{a};
+  bfloat16 B{b};
+
+  // Convert A and B from bfloat16 to float, do addition on floating-point
+  // numbers, then convert the result to bfloat16 and store it in C.
+  bfloat16 C = A + B;
+
+  // Return the result converted from bfloat16 to float.
+  return C;
+}
+
+int runTest() {
+  float data[3] = {7.0f, 8.1f, 0.0f};
+
+  float result_host = foo(7.0f, 8.1f);
+  std::cout << "Host Result = " << result_host << std::endl;
+  if (std::abs(15.1f - result_host) > 0.1f) {
+    std::cout << "Test failed. Expected Host Result ~= 15.1" << std::endl;
+    return 1;
+  }
+
+  queue deviceQueue;
+  buffer<float, 1> buf{data, 3};
+
+  deviceQueue.submit([&](handler &cgh) {
+    accessor numbers{buf, cgh, read_write};
+    cgh.single_task([=]() { numbers[2] = foo(numbers[0], numbers[1]); });
+  });
+
+  host_accessor hostOutAcc{buf, read_only};
+  float result_device = hostOutAcc[2];
+  std::cout << "Device Result = " << result_device << std::endl;
+  if (std::abs(result_host - result_device) > 0.1f) {
+    std::cout << "Test failed. Host Result !~= Device result" << std::endl;
+    return 1;
+  }
+
+  return 0;
+}
@@ -0,0 +1,21 @@
+///
+/// Check if bfloat16 example works using fallback libraries AOT compiled for
+/// both GPU and CPU.
+///
+
+// REQUIRES: opencl-aot, ocloc, gpu-intel-gen12, any-device-is-cpu
+
+// RUN: %clangxx -fsycl -fsycl-targets=spir64 %s -o %t.out
+// RUN: %{run} %t.out
+
+// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen12lp" %s -o %t.out
+// RUN: %{run} %t.out
+
+// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen12lp" %s -o %t.out
+// RUN: %{run} %t.out
+
+#include "bfloat16_example.hpp"
+
+int main() {
+  return runTest();
+}
@@ -0,0 +1,18 @@
+///
+/// Check if bfloat16 example works using fallback libraries AOT compiled for
+/// CPU.
+///
+
+// REQUIRES: opencl-aot, ocloc, gpu-intel-gen12, any-device-is-cpu
+
+// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device dg1" %s -o %t.out
+// RUN: %if cpu %{ %{run} %t.out %}
+
+// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen "-device dg1" %s -o %t.out
+// RUN: %if cpu %{ %{run} %t.out %}
+
+#include "bfloat16_example.hpp"
+
+int main() {
+  return runTest();
+}
@@ -0,0 +1,18 @@
+///
+/// Check if bfloat16 example works using fallback libraries AOT compiled for
+/// GPU.
+///
+
+// REQUIRES: opencl-aot, ocloc, gpu-intel-gen12, any-device-is-gpu
+
+// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device gen12lp" %s -o %t.out
+// RUN: %if gpu %{%{run} %t.out %}
+
+// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device *" %s -o %t.out
+// RUN: %if gpu %{%{run} %t.out %}
+
+#include "bfloat16_example.hpp"
+
+int main() {
+  return runTest();
+}
@@ -5,7 +5,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-// REQUIRES: gpu-intel-gen9 || arch-intel_gpu_pvc
+// REQUIRES: arch-intel_gpu_pvc
 
 // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%}
 // RUN: %{build} -fsycl-device-code-split=per_kernel %{mathflags} -o %t.out