[SYCL] Disable _bf16 instrinsics for SYCL (#7338)

elizabethandrews · web-flow · commit 276b3c23fdda · 2022-11-10T14:43:08.000+01:00
This fixes compilation errors of immintrin.h
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
@@ -45,9 +45,11 @@ typedef _Float16 __v16hf __attribute__((__vector_size__(32), __aligned__(32)));
 typedef _Float16 __m256h __attribute__((__vector_size__(32), __aligned__(32)));
 typedef _Float16 __m256h_u __attribute__((__vector_size__(32), __aligned__(1)));
 
+#ifndef __SYCL_DEVICE_ONLY__
 typedef __bf16 __v16bf __attribute__((__vector_size__(32), __aligned__(32)));
 typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
 #endif
+#endif
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256)))
diff --git a/clang/lib/Headers/avxneconvertintrin.h b/clang/lib/Headers/avxneconvertintrin.h
@@ -25,6 +25,7 @@
   __attribute__((__always_inline__, __nodebug__, __target__("avxneconvert"),   \
                  __min_vector_width__(256)))
 
+#ifndef __SYCL_DEVICE_ONLY__
 /// Convert scalar BF16 (16-bit) floating-point element
 /// stored at memory locations starting at location \a __A to a
 /// single-precision (32-bit) floating-point, broadcast it to packed
@@ -90,6 +91,7 @@ static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_bcstnebf16_ps(const void *__A) {
   return (__m256)__builtin_ia32_vbcstnebf162ps256((const __bf16 *)__A);
 }
+#endif
 
 /// Convert scalar half-precision (16-bit) floating-point element
 /// stored at memory locations starting at location \a __A to a
@@ -157,6 +159,7 @@ _mm256_bcstnesh_ps(const void *__A) {
   return (__m256)__builtin_ia32_vbcstnesh2ps256((const _Float16 *)__A);
 }
 
+#ifndef __SYCL_DEVICE_ONLY__
 /// Convert packed BF16 (16-bit) floating-point even-indexed elements
 /// stored at memory locations starting at location \a __A to packed
 /// single-precision (32-bit) floating-point elements, and store the results in
@@ -222,6 +225,7 @@ static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_cvtneebf16_ps(const __m256bh *__A) {
   return (__m256)__builtin_ia32_vcvtneebf162ps256((const __v16bf *)__A);
 }
+#endif
 
 /// Convert packed half-precision (16-bit) floating-point even-indexed elements
 /// stored at memory locations starting at location \a __A to packed
@@ -289,6 +293,7 @@ _mm256_cvtneeph_ps(const __m256h *__A) {
   return (__m256)__builtin_ia32_vcvtneeph2ps256((const __v16hf *)__A);
 }
 
+#ifndef __SYCL_DEVICE_ONLY__
 /// Convert packed BF16 (16-bit) floating-point odd-indexed elements
 /// stored at memory locations starting at location \a __A to packed
 /// single-precision (32-bit) floating-point elements, and store the results in
@@ -354,6 +359,7 @@ static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_cvtneobf16_ps(const __m256bh *__A) {
   return (__m256)__builtin_ia32_vcvtneobf162ps256((const __v16bf *)__A);
 }
+#endif
 
 /// Convert packed half-precision (16-bit) floating-point odd-indexed elements
 /// stored at memory locations starting at location \a __A to packed
@@ -421,6 +427,7 @@ _mm256_cvtneoph_ps(const __m256h *__A) {
   return (__m256)__builtin_ia32_vcvtneoph2ps256((const __v16hf *)__A);
 }
 
+#ifndef __SYCL_DEVICE_ONLY__
 /// Convert packed single-precision (32-bit) floating-point elements in \a __A
 /// to packed BF16 (16-bit) floating-point elements, and store the results in \a
 /// dst.
@@ -476,6 +483,7 @@ static __inline__ __m128bh __DEFAULT_FN_ATTRS256
 _mm256_cvtneps_avx_pbh(__m256 __A) {
   return (__m128bh)__builtin_ia32_vcvtneps2bf16256((__v8sf)__A);
 }
+#endif
 
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
@@ -44,9 +44,11 @@ typedef _Float16 __v8hf __attribute__((__vector_size__(16), __aligned__(16)));
 typedef _Float16 __m128h __attribute__((__vector_size__(16), __aligned__(16)));
 typedef _Float16 __m128h_u __attribute__((__vector_size__(16), __aligned__(1)));
 
+#ifndef __SYCL_DEVICE_ONLY__
 typedef __bf16 __v8bf __attribute__((__vector_size__(16), __aligned__(16)));
 typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
 #endif
+#endif
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS                                                     \
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
@@ -229,6 +229,7 @@
 #include <avx512vlfp16intrin.h>
 #endif
 
+#ifndef __SYCL_DEVICE_ONLY__
 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
     defined(__AVX512BF16__)
 #include <avx512bf16intrin.h>
@@ -238,6 +239,7 @@
     (defined(__AVX512VL__) && defined(__AVX512BF16__))
 #include <avx512vlbf16intrin.h>
 #endif
+#endif
 
 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
     defined(__PKU__)
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
@@ -1527,13 +1527,9 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
     break;
   case DeclSpec::TST_half:    Result = Context.HalfTy; break;
   case DeclSpec::TST_BFloat16:
-    // Disable errors for SYCL and OpenMP device since definition of __bf16 is
-    // being moved to a shared header and it causes new errors emitted when
-    // host code is compiled with device compiler for SPIR target.
-    // FIXME: device code specific diagnostic is probably needed.
-    if (!S.Context.getTargetInfo().hasBFloat16Type() &&
-        !S.getLangOpts().SYCLIsDevice && !S.getLangOpts().OpenMPIsDevice)
-      S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported) << "__bf16";
+    if (!S.Context.getTargetInfo().hasBFloat16Type())
+      S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported)
+	<< "__bf16";
     Result = Context.BFloat16Ty;
     break;
   case DeclSpec::TST_float:   Result = Context.FloatTy; break;
@@ -2730,15 +2726,8 @@ QualType Sema::BuildVectorType(QualType CurType, Expr *SizeExpr,
   }
 
   if (!TypeSize || VectorSizeBits % TypeSize) {
-    // Disable errors for SYCL and OpenMP device since definition of __bf16 is
-    // being moved to a shared header and it causes new errors emitted when
-    // host code is compiled with device compiler for SPIR target.
-    // FIXME: device code specific diagnostic is probably needed.
-    if (!(!TypeSize &&
-          (getLangOpts().OpenMPIsDevice || getLangOpts().SYCLIsDevice))) {
       Diag(AttrLoc, diag::err_attribute_invalid_size)
           << SizeExpr->getSourceRange();
-    }
     return QualType();
   }