intel
diff --git a/‎sycl/include/sycl/accessor_image.hpp‎
Lines changed: 29 additions & 30 deletions b/‎sycl/include/sycl/accessor_image.hpp‎
Lines changed: 29 additions & 30 deletions
diff --git a/‎sycl/include/sycl/detail/generic_type_lists.hpp‎
Lines changed: 0 additions & 2 deletions b/‎sycl/include/sycl/detail/generic_type_lists.hpp‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎sycl/include/sycl/detail/generic_type_traits.hpp‎
Lines changed: 7 additions & 20 deletions b/‎sycl/include/sycl/detail/generic_type_traits.hpp‎
Lines changed: 7 additions & 20 deletions
diff --git a/‎sycl/include/sycl/vector.hpp‎
Lines changed: 13 additions & 12 deletions b/‎sycl/include/sycl/vector.hpp‎
Lines changed: 13 additions & 12 deletions
diff --git a/‎sycl/source/detail/device_info.hpp‎
Lines changed: 82 additions & 0 deletions b/‎sycl/source/detail/device_info.hpp‎
Lines changed: 82 additions & 0 deletions
diff --git a/‎sycl/test-e2e/Matrix/common.hpp‎
Lines changed: 5 additions & 3 deletions b/‎sycl/test-e2e/Matrix/common.hpp‎
Lines changed: 5 additions & 3 deletions
@@ -12,20 +12,21 @@
 namespace sycl {
 inline namespace _V1 {
 namespace detail {
-template <int Dim, typename T> struct IsValidCoordDataT;
-template <typename T> struct IsValidCoordDataT<1, T> {
-  constexpr static bool value = detail::is_contained<
-      T, detail::type_list<opencl::cl_int, opencl::cl_float>>::type::value;
+template <int Dim, typename T, bool AllowFP = true> struct IsValidCoordDataT;
+template <typename T, bool AllowFP> struct IsValidCoordDataT<1, T, AllowFP> {
+  constexpr static bool value =
+      std::is_same_v<T, opencl::cl_int> ||
+      (AllowFP && std::is_same_v<T, opencl::cl_float>);
 };
-template <typename T> struct IsValidCoordDataT<2, T> {
-  constexpr static bool value = detail::is_contained<
-      T, detail::type_list<vec<opencl::cl_int, 2>,
-                           vec<opencl::cl_float, 2>>>::type::value;
+template <typename T, bool AllowFP> struct IsValidCoordDataT<2, T, AllowFP> {
+  constexpr static bool value =
+      std::is_same_v<T, vec<opencl::cl_int, 2>> ||
+      (AllowFP && std::is_same_v<T, vec<opencl::cl_float, 2>>);
 };
-template <typename T> struct IsValidCoordDataT<3, T> {
-  constexpr static bool value = detail::is_contained<
-      T, detail::type_list<vec<opencl::cl_int, 4>,
-                           vec<opencl::cl_float, 4>>>::type::value;
+template <typename T, bool AllowFP> struct IsValidCoordDataT<3, T, AllowFP> {
+  constexpr static bool value =
+      std::is_same_v<T, vec<opencl::cl_int, 4>> ||
+      (AllowFP && std::is_same_v<T, vec<opencl::cl_float, 4>>);
 };
 
 template <int Dim, typename T> struct IsValidUnsampledCoord2020DataT;
@@ -448,12 +449,12 @@ class image_accessor
   // (accessTarget == access::target::image && accessMode == access::mode::read)
   // || (accessTarget == access::target::host_image && ( accessMode ==
   // access::mode::read || accessMode == access::mode::read_write))
-  template <typename CoordT, int Dims = Dimensions,
-            typename = std::enable_if_t<
-                (Dims > 0) && (IsValidCoordDataT<Dims, CoordT>::value) &&
-                (detail::is_genint_v<CoordT>) &&
-                ((IsImageAcc && IsImageAccessReadOnly) ||
-                 (IsHostImageAcc && IsImageAccessAnyRead))>>
+  template <
+      typename CoordT, int Dims = Dimensions,
+      typename = std::enable_if_t<
+          (IsValidCoordDataT<Dims, CoordT, /* AllowFP = */ false>::value) &&
+          ((IsImageAcc && IsImageAccessReadOnly) ||
+           (IsHostImageAcc && IsImageAccessAnyRead))>>
   DataT read(const CoordT &Coords) const {
 #ifdef __SYCL_DEVICE_ONLY__
     return __invoke__ImageRead<DataT, OCLImageTy, CoordT>(MImageObj, Coords);
@@ -470,7 +471,7 @@ class image_accessor
   // access::mode::read || accessMode == access::mode::read_write))
   template <typename CoordT, int Dims = Dimensions,
             typename = std::enable_if_t<
-                (Dims > 0) && (IsValidCoordDataT<Dims, CoordT>::value) &&
+                (IsValidCoordDataT<Dims, CoordT>::value) &&
                 ((IsImageAcc && IsImageAccessReadOnly) ||
                  (IsHostImageAcc && IsImageAccessAnyRead))>>
   DataT read(const CoordT &Coords, const sampler &Smpl) const {
@@ -494,10 +495,10 @@ class image_accessor
   // accessMode == access::mode::read_write))
   template <
       typename CoordT, int Dims = Dimensions,
-      typename = std::enable_if_t<(Dims > 0) && (detail::is_genint_v<CoordT>) &&
-                                  (IsValidCoordDataT<Dims, CoordT>::value) &&
-                                  ((IsImageAcc && IsImageAccessWriteOnly) ||
-                                   (IsHostImageAcc && IsImageAccessAnyWrite))>>
+      typename = std::enable_if_t<
+          (IsValidCoordDataT<Dims, CoordT, /* AllowFP = */ false>::value) &&
+          ((IsImageAcc && IsImageAccessWriteOnly) ||
+           (IsHostImageAcc && IsImageAccessAnyWrite))>>
   void write(const CoordT &Coords, const DataT &Color) const {
 #ifdef __SYCL_DEVICE_ONLY__
     __invoke__ImageWrite<OCLImageTy, CoordT, DataT>(MImageObj, Coords, Color);
@@ -546,23 +547,21 @@ class __image_array_slice__ {
       size_t Idx)
       : MBaseAcc(BaseAcc), MIdx(Idx) {}
 
-  template <typename CoordT, int Dims = Dimensions,
-            typename = std::enable_if_t<
-                (Dims > 0) && (IsValidCoordDataT<Dims, CoordT>::value)>>
+  template <
+      typename CoordT, int Dims = Dimensions,
+      typename = std::enable_if_t<(IsValidCoordDataT<Dims, CoordT>::value)>>
   DataT read(const CoordT &Coords) const {
     return MBaseAcc.read(getAdjustedCoords(Coords));
   }
 
   template <typename CoordT, int Dims = Dimensions,
-            typename = std::enable_if_t<(Dims > 0) &&
-                                        IsValidCoordDataT<Dims, CoordT>::value>>
+            typename = std::enable_if_t<IsValidCoordDataT<Dims, CoordT>::value>>
   DataT read(const CoordT &Coords, const sampler &Smpl) const {
     return MBaseAcc.read(getAdjustedCoords(Coords), Smpl);
   }
 
   template <typename CoordT, int Dims = Dimensions,
-            typename = std::enable_if_t<(Dims > 0) &&
-                                        IsValidCoordDataT<Dims, CoordT>::value>>
+            typename = std::enable_if_t<IsValidCoordDataT<Dims, CoordT>::value>>
   void write(const CoordT &Coords, const DataT &Color) const {
     return MBaseAcc.write(getAdjustedCoords(Coords), Color);
   }
 
@@ -70,8 +70,6 @@ using scalar_vector_bfloat16_list =
 using bfloat16_list =
     tl_append<scalar_bfloat16_list, vector_bfloat16_list, marray_bfloat16_list>;
 
-using half_bfloat16_list = tl_append<scalar_half_list, scalar_bfloat16_list>;
-
 using scalar_float_list = type_list<float>;
 
 using vector_float_list =
 
@@ -31,17 +31,6 @@ template <typename T>
 inline constexpr bool is_svgenfloatf_v =
     is_contained_v<T, gtl::scalar_vector_float_list>;
 
-template <typename T>
-inline constexpr bool is_half_v = is_contained_v<T, gtl::scalar_half_list>;
-
-template <typename T>
-inline constexpr bool is_bfloat16_v =
-    is_contained_v<T, gtl::scalar_bfloat16_list>;
-
-template <typename T>
-inline constexpr bool is_half_or_bf16_v =
-    is_contained_v<T, gtl::half_bfloat16_list>;
-
 template <typename T>
 inline constexpr bool is_svgenfloath_v =
     is_contained_v<T, gtl::scalar_vector_half_list>;
@@ -57,9 +46,6 @@ template <typename T>
 inline constexpr bool is_vgenfloat_v =
     is_contained_v<T, gtl::vector_floating_list>;
 
-template <typename T>
-inline constexpr bool is_genint_v = is_contained_v<T, gtl::signed_int_list>;
-
 template <typename T>
 inline constexpr bool is_geninteger_v = is_contained_v<T, gtl::integer_list>;
 
@@ -141,10 +127,11 @@ template <typename T> auto convertToOpenCLType(T &&x) {
     // sycl::half may convert to _Float16, and we would try to instantiate
     // vec class with _Float16 DataType, which is not expected there. As
     // such, leave vector<half, N> as-is.
-    using MatchingVec = vec<std::conditional_t<is_half_v<ElemTy>, ElemTy,
-                                               decltype(convertToOpenCLType(
-                                                   std::declval<ElemTy>()))>,
-                            no_ref::size()>;
+    using MatchingVec =
+        vec<std::conditional_t<std::is_same_v<ElemTy, half>, ElemTy,
+                               decltype(convertToOpenCLType(
+                                   std::declval<ElemTy>()))>,
+            no_ref::size()>;
 #ifdef __SYCL_DEVICE_ONLY__
     return sycl::bit_cast<typename MatchingVec::vector_t>(x);
 #else
@@ -160,11 +147,11 @@ template <typename T> auto convertToOpenCLType(T &&x) {
                                           fixed_width_unsigned<sizeof(no_ref)>>;
     static_assert(sizeof(OpenCLType) == sizeof(T));
     return static_cast<OpenCLType>(x);
-  } else if constexpr (is_half_v<no_ref>) {
+  } else if constexpr (std::is_same_v<no_ref, half>) {
     using OpenCLType = sycl::detail::half_impl::BIsRepresentationT;
     static_assert(sizeof(OpenCLType) == sizeof(T));
     return static_cast<OpenCLType>(x);
-  } else if constexpr (is_bfloat16_v<no_ref>) {
+  } else if constexpr (std::is_same_v<no_ref, ext::oneapi::bfloat16>) {
     // On host, don't interpret BF16 as uint16.
 #ifdef __SYCL_DEVICE_ONLY__
     using OpenCLType = sycl::ext::oneapi::detail::Bfloat16StorageT;
 
@@ -119,6 +119,11 @@ struct ScalarConversionOperatorMixIn<Vec, T, N, std::enable_if_t<N == 1>> {
   operator T() const { return (*static_cast<const Vec *>(this))[0]; }
 };
 
+template <typename T>
+inline constexpr bool is_fundamental_or_half_or_bfloat16 =
+    std::is_fundamental_v<T> || std::is_same_v<std::remove_const_t<T>, half> ||
+    std::is_same_v<std::remove_const_t<T>, ext::oneapi::bfloat16>;
+
 } // namespace detail
 
 ///////////////////////// class sycl::vec /////////////////////////
@@ -288,10 +293,8 @@ class __SYCL_EBO vec
   // when NumElements == 1. The template prevents implicit conversion from
   // vec<_, 1> to DataT.
   template <typename Ty = DataT>
-  typename std::enable_if_t<
-      std::is_fundamental_v<Ty> ||
-          detail::is_half_or_bf16_v<typename std::remove_const_t<Ty>>,
-      vec &>
+  typename std::enable_if_t<detail::is_fundamental_or_half_or_bfloat16<Ty>,
+                            vec &>
   operator=(const DataT &Rhs) {
     *this = vec{Rhs};
     return *this;
@@ -626,16 +629,14 @@ class SwizzleOp {
       1 != IdxNum && SwizzleOp::getNumElements() == IdxNum, T>;
 
   template <typename T>
-  using EnableIfScalarType = typename std::enable_if_t<
-      std::is_convertible_v<DataT, T> &&
-      (std::is_fundamental_v<T> ||
-       detail::is_half_or_bf16_v<typename std::remove_const_t<T>>)>;
+  using EnableIfScalarType =
+      typename std::enable_if_t<std::is_convertible_v<DataT, T> &&
+                                detail::is_fundamental_or_half_or_bfloat16<T>>;
 
   template <typename T>
-  using EnableIfNoScalarType = typename std::enable_if_t<
-      !std::is_convertible_v<DataT, T> ||
-      !(std::is_fundamental_v<T> ||
-        detail::is_half_or_bf16_v<typename std::remove_const_t<T>>)>;
+  using EnableIfNoScalarType =
+      typename std::enable_if_t<!std::is_convertible_v<DataT, T> ||
+                                !detail::is_fundamental_or_half_or_bfloat16<T>>;
 
   template <int... Indices>
   using Swizzle =
 
@@ -850,14 +850,96 @@ struct get_device_info_impl<
            matrix_type::sint32, matrix_type::sint32},
           {8, 0, 0, 0, 16, 16, matrix_type::fp16, matrix_type::fp16,
            matrix_type::fp32, matrix_type::fp32},
+          {8, 0, 0, 0, 16, 16, matrix_type::fp16, matrix_type::fp16,
+           matrix_type::fp16, matrix_type::fp32},
+          {8, 0, 0, 0, 16, 16, matrix_type::fp16, matrix_type::fp16,
+           matrix_type::fp32, matrix_type::fp16},
+          {8, 0, 0, 0, 16, 16, matrix_type::fp16, matrix_type::fp16,
+           matrix_type::fp16, matrix_type::fp16},
+          {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
+           matrix_type::fp32, matrix_type::fp16},
+          {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
+           matrix_type::fp16, matrix_type::fp16},
+          {0, 0, 0, 1, 64, 16, matrix_type::fp16, matrix_type::fp16,
+           matrix_type::fp32, matrix_type::fp32},
+          {0, 0, 0, 1, 64, 16, matrix_type::fp16, matrix_type::fp16,
+           matrix_type::fp16, matrix_type::fp32},
+          {0, 0, 0, 1, 64, 16, matrix_type::fp16, matrix_type::fp16,
+           matrix_type::fp32, matrix_type::fp16},
+          {0, 0, 0, 1, 64, 16, matrix_type::fp16, matrix_type::fp16,
+           matrix_type::fp16, matrix_type::fp16},
+          {0, 0, 0, 32, 64, 16, matrix_type::fp16, matrix_type::fp16,
+           matrix_type::fp32, matrix_type::fp32},
+          {0, 0, 0, 32, 64, 16, matrix_type::fp16, matrix_type::fp16,
+           matrix_type::fp16, matrix_type::fp32},
+          {0, 0, 0, 32, 64, 16, matrix_type::fp16, matrix_type::fp16,
+           matrix_type::fp32, matrix_type::bf16},
+          {0, 0, 0, 32, 64, 16, matrix_type::fp16, matrix_type::fp16,
+           matrix_type::fp16, matrix_type::fp16},
+          {0, 0, 0, 1, 64, 32, matrix_type::fp16, matrix_type::fp16,
+           matrix_type::fp32, matrix_type::fp32},
+          {0, 0, 0, 1, 64, 32, matrix_type::fp16, matrix_type::fp16,
+           matrix_type::fp16, matrix_type::fp32},
+          {0, 0, 0, 1, 64, 32, matrix_type::fp16, matrix_type::fp16,
+           matrix_type::fp32, matrix_type::fp16},
+          {0, 0, 0, 1, 64, 32, matrix_type::fp16, matrix_type::fp16,
+           matrix_type::fp16, matrix_type::fp16},
+          {0, 0, 0, 32, 64, 32, matrix_type::fp16, matrix_type::fp16,
+           matrix_type::fp32, matrix_type::fp32},
+          {0, 0, 0, 32, 64, 32, matrix_type::fp16, matrix_type::fp16,
+           matrix_type::fp16, matrix_type::fp32},
+          {0, 0, 0, 32, 64, 32, matrix_type::fp16, matrix_type::fp16,
+           matrix_type::fp32, matrix_type::fp16},
+          {0, 0, 0, 32, 64, 32, matrix_type::fp16, matrix_type::fp16,
+           matrix_type::fp16, matrix_type::fp16},
+          {8, 0, 0, 0, 16, 16, matrix_type::bf16, matrix_type::bf16,
+           matrix_type::bf16, matrix_type::bf16},
+          {8, 0, 0, 0, 16, 16, matrix_type::bf16, matrix_type::bf16,
+           matrix_type::fp32, matrix_type::bf16},
+          {8, 0, 0, 0, 16, 16, matrix_type::bf16, matrix_type::bf16,
+           matrix_type::bf16, matrix_type::fp32},
           {8, 0, 0, 0, 16, 16, matrix_type::bf16, matrix_type::bf16,
            matrix_type::fp32, matrix_type::fp32},
           {0, 0, 0, 16, 16, 16, matrix_type::bf16, matrix_type::bf16,
            matrix_type::fp32, matrix_type::fp32},
+          {0, 0, 0, 16, 16, 16, matrix_type::bf16, matrix_type::bf16,
+           matrix_type::bf16, matrix_type::fp32},
+          {0, 0, 0, 16, 16, 16, matrix_type::bf16, matrix_type::bf16,
+           matrix_type::fp32, matrix_type::bf16},
+          {0, 0, 0, 16, 16, 16, matrix_type::bf16, matrix_type::bf16,
+           matrix_type::bf16, matrix_type::bf16},
           {0, 0, 0, 1, 64, 16, matrix_type::bf16, matrix_type::bf16,
            matrix_type::fp32, matrix_type::fp32},
+          {0, 0, 0, 1, 64, 16, matrix_type::bf16, matrix_type::bf16,
+           matrix_type::bf16, matrix_type::fp32},
+          {0, 0, 0, 1, 64, 16, matrix_type::bf16, matrix_type::bf16,
+           matrix_type::fp32, matrix_type::bf16},
+          {0, 0, 0, 1, 64, 16, matrix_type::bf16, matrix_type::bf16,
+           matrix_type::bf16, matrix_type::bf16},
           {0, 0, 0, 32, 64, 16, matrix_type::bf16, matrix_type::bf16,
            matrix_type::fp32, matrix_type::fp32},
+          {0, 0, 0, 32, 64, 16, matrix_type::bf16, matrix_type::bf16,
+           matrix_type::bf16, matrix_type::fp32},
+          {0, 0, 0, 32, 64, 16, matrix_type::bf16, matrix_type::bf16,
+           matrix_type::fp32, matrix_type::bf16},
+          {0, 0, 0, 32, 64, 16, matrix_type::bf16, matrix_type::bf16,
+           matrix_type::bf16, matrix_type::bf16},
+          {0, 0, 0, 1, 64, 32, matrix_type::bf16, matrix_type::bf16,
+           matrix_type::fp32, matrix_type::fp32},
+          {0, 0, 0, 1, 64, 32, matrix_type::bf16, matrix_type::bf16,
+           matrix_type::bf16, matrix_type::fp32},
+          {0, 0, 0, 1, 64, 32, matrix_type::bf16, matrix_type::bf16,
+           matrix_type::fp32, matrix_type::bf16},
+          {0, 0, 0, 1, 64, 32, matrix_type::bf16, matrix_type::bf16,
+           matrix_type::bf16, matrix_type::bf16},
+          {0, 0, 0, 32, 64, 32, matrix_type::bf16, matrix_type::bf16,
+           matrix_type::fp32, matrix_type::fp32},
+          {0, 0, 0, 32, 64, 32, matrix_type::bf16, matrix_type::bf16,
+           matrix_type::bf16, matrix_type::fp32},
+          {0, 0, 0, 32, 64, 32, matrix_type::bf16, matrix_type::bf16,
+           matrix_type::fp32, matrix_type::bf16},
+          {0, 0, 0, 32, 64, 32, matrix_type::bf16, matrix_type::bf16,
+           matrix_type::bf16, matrix_type::bf16},
           {8, 0, 0, 0, 16, 8, matrix_type::tf32, matrix_type::tf32,
            matrix_type::fp32, matrix_type::fp32},
       };
 
@@ -63,15 +63,17 @@ void matrix_multiply_ref(Ta *A, Tb *B, Tc *C, int M, int N, int K,
           if constexpr (std::is_same_v<Ta, bfloat16> &&
                         std::is_same_v<Tc, float>)
             acc += make_fp32(va[i]) * make_fp32(vb[i]);
+          else if constexpr (std::is_same_v<Ta, sycl::half> &&
+                             std::is_same_v<Tc, float>)
+            acc += (float)va[i] * (float)vb[i];
           else if constexpr (std::is_same_v<Ta, float> &&
                                  std::is_same_v<Tc, float> ||
                              std::is_integral_v<Ta> && std::is_integral_v<Tc> ||
+                             (std::is_same_v<Ta, bfloat16> ||
+                              std::is_same_v<Ta, sycl::half>) ||
                              (std::is_same_v<Ta, double> &&
                               std::is_same_v<Tc, double>))
             acc += va[i] * vb[i];
-          else if constexpr (std::is_same_v<Ta, sycl::half> &&
-                             std::is_same_v<Tc, float>)
-            acc += (float)va[i] * (float)vb[i];
           else
             assert(false && "Unsupported type in matrix_multiply_ref.");
         }