FP8 bitcast

momchil-velikov · momchil-velikov · commit 42b36c55b3d1 · 2025-01-13T17:04:28.000Z
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3415,6 +3415,8 @@ def err_typecheck_vector_not_convertable : Error<
   "cannot convert between vector values of different size (%0 and %1)">;
 def err_typecheck_vector_not_convertable_non_scalar : Error<
   "cannot convert between vector and non-scalar values (%0 and %1)">;
+def err_typecheck_vector_not_convertable_non_vector : Error<
+  "cannot convert between vector type %0 and non-vector type %1">;
 def err_typecheck_vector_lengths_not_equal : Error<
   "vector operands do not have the same number of elements (%0 and %1)">;
 def warn_typecheck_vector_element_sizes_not_equal : Warning<
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
@@ -7429,6 +7429,11 @@ class Sema final : public SemaBase {
   /// the perspective of SVE bitcasts.
   bool isValidSveBitcast(QualType srcType, QualType destType);
 
+  /// Check for bitcast beween a regular vector type and builtin Neon vector
+  /// type.
+  bool isValidNeonVectorBuiltinTypeBitcast(SourceRange OpRange, QualType SrcTy,
+                                           QualType DstTy);
+
   /// Are the two types matrix types and do they have the same dimensions i.e.
   /// do they have the same number of rows and the same number of columns?
   bool areMatrixTypesOfTheSameDimension(QualType srcTy, QualType destTy);
diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp
@@ -2386,6 +2386,16 @@ static TryCastResult TryReinterpretCast(Sema &Self, ExprResult &SrcExpr,
     return TC_Success;
   }
 
+  // Allow bitcasting between a regular vector type or a scalar, and a builtin
+  // Neon vector type.
+  if ((SrcType->isNeonVectorBuiltinType() ||
+       DestType->isNeonVectorBuiltinType())) {
+    if (!Self.isValidNeonVectorBuiltinTypeBitcast(OpRange, SrcType, DestType))
+      return TC_Failed;
+    Kind = CK_BitCast;
+    return TC_Success;
+  }
+
   // Allow reinterpret_casts between vectors of the same size and
   // between vectors and integers of the same size.
   bool destIsVector = DestType->isVectorType();
@@ -3009,6 +3019,18 @@ void CastOperation::CheckCStyleCast() {
     return;
   }
 
+  // Allow bitcasting between a regular vector type and a builtin Neon vector
+  // type.
+  if (SrcType->isNeonVectorBuiltinType() ||
+      DestType->isNeonVectorBuiltinType()) {
+    if (!Self.isValidNeonVectorBuiltinTypeBitcast(OpRange, SrcType, DestType)) {
+      SrcExpr = ExprError();
+      return;
+    }
+    Kind = CK_BitCast;
+    return;
+  }
+
   // Allow bitcasting between compatible RVV vector types.
   if ((SrcType->isVectorType() || DestType->isVectorType()) &&
       Self.RISCV().isValidRVVBitcast(SrcType, DestType)) {
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
@@ -7530,6 +7530,54 @@ bool Sema::isValidSveBitcast(QualType srcTy, QualType destTy) {
          ValidScalableConversion(destTy, srcTy);
 }
 
+bool Sema::isValidNeonVectorBuiltinTypeBitcast(SourceRange OpRange,
+                                               QualType SrcTy, QualType DstTy) {
+  assert(SrcTy->isNeonVectorBuiltinType() || DstTy->isNeonVectorBuiltinType());
+
+  auto checkCast = [&](QualType BT, QualType OT) -> unsigned {
+    if (OT->isNeonVectorBuiltinType()) {
+      if (BT.getCanonicalType() != OT.getCanonicalType())
+        return diag::err_invalid_conversion_between_vectors;
+      return 0;
+    }
+
+    if (!OT->isVectorType()) {
+      if (!OT->isScalarType())
+        return diag::err_typecheck_vector_not_convertable_non_scalar;
+
+      if (!OT->isIntegralType(Context))
+        return diag::err_typecheck_vector_not_convertable_non_vector;
+    }
+
+    uint64_t OTLen;
+    QualType OTEltTy;
+    if (const VectorType *VT = OT->getAs<VectorType>()) {
+      OTLen = VT->getNumElements();
+      OTEltTy = VT->getElementType();
+    } else {
+      OTLen = 1;
+      OTEltTy = OT;
+    }
+
+    uint64_t BTLen =
+        BT->getAs<BuiltinType>()->getKind() == BuiltinType::MFloat8x8 ? 8u
+                                                                      : 16u;
+    if (BTLen * 8u != OTLen * Context.getTypeSize(OTEltTy))
+      return diag::err_invalid_conversion_between_vectors;
+    return 0;
+  };
+
+  QualType BT = SrcTy;
+  QualType OT = DstTy;
+  if (!SrcTy->isNeonVectorBuiltinType())
+    std::swap(BT, OT);
+
+  if (unsigned msg = checkCast(BT, OT))
+    return Diag(OpRange.getBegin(), msg) << BT << OT << OpRange;
+
+  return true;
+}
+
 bool Sema::areMatrixTypesOfTheSameDimension(QualType srcTy, QualType destTy) {
   if (!destTy->isMatrixType() || !srcTy->isMatrixType())
     return false;
diff --git a/clang/test/CodeGen/AArch64/fp8-cast.c b/clang/test/CodeGen/AArch64/fp8-cast.c
@@ -0,0 +1,193 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1        -triple aarch64-none-linux-gnu -target-feature +neon -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s -check-prefix CHECK-CXX
+
+// RUN: %clang_cc1        -triple aarch64-none-linux-gnu -target-feature +neon -disable-O0-optnone -Werror -Wall -S -o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_neon.h>
+
+// Bitcast between FP8 Neon vectors
+// CHECK-LABEL: define dso_local <8 x i8> @test_f8_f8(
+// CHECK-SAME: <8 x i8> [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <8 x i8> [[X]]
+//
+// CHECK-CXX-LABEL: define dso_local <8 x i8> @_Z10test_f8_f8u13__MFloat8x8_t(
+// CHECK-CXX-SAME: <8 x i8> [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    ret <8 x i8> [[X]]
+//
+mfloat8x8_t test_f8_f8(mfloat8x8_t x) {
+    return (mfloat8x8_t) x;
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @testq_f8_f8(
+// CHECK-SAME: <16 x i8> [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <16 x i8> [[X]]
+//
+// CHECK-CXX-LABEL: define dso_local <16 x i8> @_Z11testq_f8_f8u14__MFloat8x16_t(
+// CHECK-CXX-SAME: <16 x i8> [[X:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    ret <16 x i8> [[X]]
+//
+mfloat8x16_t testq_f8_f8(mfloat8x16_t x) {
+    return (mfloat8x16_t) x;
+}
+
+// Bitcast between FP8 and int8 Neon vectors
+// CHECK-LABEL: define dso_local <8 x i8> @test_f8_s8(
+// CHECK-SAME: <8 x i8> noundef [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <8 x i8> [[X]]
+//
+// CHECK-CXX-LABEL: define dso_local <8 x i8> @_Z10test_f8_s810__Int8x8_t(
+// CHECK-CXX-SAME: <8 x i8> noundef [[X:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    ret <8 x i8> [[X]]
+//
+mfloat8x8_t test_f8_s8(int8x8_t x) {
+    return (mfloat8x8_t) x;
+}
+
+// CHECK-LABEL: define dso_local <8 x i8> @test_s8_f8(
+// CHECK-SAME: <8 x i8> [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <8 x i8> [[X]]
+//
+// CHECK-CXX-LABEL: define dso_local noundef <8 x i8> @_Z10test_s8_f8u13__MFloat8x8_t(
+// CHECK-CXX-SAME: <8 x i8> [[X:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    ret <8 x i8> [[X]]
+//
+int8x8_t test_s8_f8(mfloat8x8_t x) {
+    return (int8x8_t) x;
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @testq_f8_s8(
+// CHECK-SAME: <16 x i8> noundef [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <16 x i8> [[X]]
+//
+// CHECK-CXX-LABEL: define dso_local <16 x i8> @_Z11testq_f8_s811__Int8x16_t(
+// CHECK-CXX-SAME: <16 x i8> noundef [[X:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    ret <16 x i8> [[X]]
+//
+mfloat8x16_t testq_f8_s8(int8x16_t x) {
+    return (mfloat8x16_t) x;
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @testq_s8_f8(
+// CHECK-SAME: <16 x i8> [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <16 x i8> [[X]]
+//
+// CHECK-CXX-LABEL: define dso_local noundef <16 x i8> @_Z11testq_s8_f8u14__MFloat8x16_t(
+// CHECK-CXX-SAME: <16 x i8> [[X:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    ret <16 x i8> [[X]]
+//
+int8x16_t testq_s8_f8(mfloat8x16_t x) {
+    return (int8x16_t) x;
+}
+
+// Bitcast between FP8 and float32 Neon vectors
+// CHECK-LABEL: define dso_local <8 x i8> @test_f8_f32(
+// CHECK-SAME: <2 x float> noundef [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[X]] to <8 x i8>
+// CHECK-NEXT:    ret <8 x i8> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <8 x i8> @_Z11test_f8_f3213__Float32x2_t(
+// CHECK-CXX-SAME: <2 x float> noundef [[X:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[X]] to <8 x i8>
+// CHECK-CXX-NEXT:    ret <8 x i8> [[TMP0]]
+//
+mfloat8x8_t test_f8_f32(float32x2_t x) {
+    return (mfloat8x8_t) x;
+}
+
+// CHECK-LABEL: define dso_local <2 x float> @test_f32_f8(
+// CHECK-SAME: <8 x i8> [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i8> [[X]] to <2 x float>
+// CHECK-NEXT:    ret <2 x float> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local noundef <2 x float> @_Z11test_f32_f8u13__MFloat8x8_t(
+// CHECK-CXX-SAME: <8 x i8> [[X:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    [[TMP0:%.*]] = bitcast <8 x i8> [[X]] to <2 x float>
+// CHECK-CXX-NEXT:    ret <2 x float> [[TMP0]]
+//
+float32x2_t test_f32_f8(mfloat8x8_t x) {
+    return (float32x2_t) x;
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @testq_f8_f32(
+// CHECK-SAME: <4 x float> noundef [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[X]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <16 x i8> @_Z12testq_f8_f3213__Float32x4_t(
+// CHECK-CXX-SAME: <4 x float> noundef [[X:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[X]] to <16 x i8>
+// CHECK-CXX-NEXT:    ret <16 x i8> [[TMP0]]
+//
+mfloat8x16_t testq_f8_f32(float32x4_t x) {
+    return (mfloat8x16_t) x;
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @testq_f32_f8(
+// CHECK-SAME: <16 x i8> [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[X]] to <4 x float>
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local noundef <4 x float> @_Z12testq_f32_f8u14__MFloat8x16_t(
+// CHECK-CXX-SAME: <16 x i8> [[X:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[X]] to <4 x float>
+// CHECK-CXX-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t testq_f32_f8(mfloat8x16_t x) {
+    return (float32x4_t) x;
+}
+
+// Bitcast between FP8 and poly128_t (which is integral)
+// CHECK-LABEL: define dso_local <16 x i8> @testq_f8_p128(
+// CHECK-SAME: i128 noundef [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[X]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <16 x i8> @_Z13testq_f8_p128o(
+// CHECK-CXX-SAME: i128 noundef [[X:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    [[TMP0:%.*]] = bitcast i128 [[X]] to <16 x i8>
+// CHECK-CXX-NEXT:    ret <16 x i8> [[TMP0]]
+//
+mfloat8x16_t testq_f8_p128(poly128_t x) {
+    return (mfloat8x16_t) x;
+}
+
+// CHECK-LABEL: define dso_local i128 @testq_p128_f8(
+// CHECK-SAME: <16 x i8> [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[X]] to i128
+// CHECK-NEXT:    ret i128 [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local noundef i128 @_Z13testq_p128_f8u14__MFloat8x16_t(
+// CHECK-CXX-SAME: <16 x i8> [[X:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[X]] to i128
+// CHECK-CXX-NEXT:    ret i128 [[TMP0]]
+//
+poly128_t testq_p128_f8(mfloat8x16_t x) {
+    return (poly128_t) x;
+}
diff --git a/clang/test/Sema/aarch64-fp8-cast.c b/clang/test/Sema/aarch64-fp8-cast.c