@@ -6868,6 +6868,13 @@ Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
68686868 return Builder.CreateCall(F, Ops, name);
68696869}
68706870
6871+ Value *CodeGenFunction::EmitFP8NeonCall(Function *F,
6872+ SmallVectorImpl<Value *> &Ops,
6873+ Value *FPM, const char *name) {
6874+ Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr), FPM);
6875+ return EmitNeonCall(F, Ops, name);
6876+ }
6877+
68716878Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
68726879 bool neg) {
68736880 int SV = cast<ConstantInt>(V)->getSExtValue();
@@ -14011,7 +14018,118 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1401114018 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
1401214019 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
1401314020 }
14014-
14021+ case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
14022+ case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
14023+ case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm: {
14024+ Int = Intrinsic::aarch64_neon_fp8_cvtl1;
14025+ llvm::Type *Tys[2];
14026+ Tys[0] = llvm::FixedVectorType::get(BFloatTy, 8);
14027+ // Op[1] is mfloat8x16_t, but the intrinsic converts only the lower part of
14028+ // the vector.
14029+ if (BuiltinID == NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm) {
14030+ Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14031+ /*isQuad*/ false));
14032+ Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14033+ } else
14034+ Tys[1] = Ops[0]->getType();
14035+ llvm::Value *FPM =
14036+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14037+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt1");
14038+ }
14039+ case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
14040+ case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
14041+ case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm: {
14042+ Int = Intrinsic::aarch64_neon_fp8_cvtl2;
14043+ llvm::Type *Tys[2];
14044+ Tys[0] = llvm::FixedVectorType::get(BFloatTy, 8);
14045+ // Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14046+ // part of the vector.
14047+ if (BuiltinID == NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm) {
14048+ Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14049+ /*isQuad*/ false));
14050+ Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14051+ } else
14052+ Tys[1] = Ops[0]->getType();
14053+ llvm::Value *FPM =
14054+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14055+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt2");
14056+ }
14057+ case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
14058+ case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
14059+ case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm: {
14060+ Int = Intrinsic::aarch64_neon_fp8_cvtl1;
14061+ llvm::Type *Tys[2];
14062+ Tys[0] = llvm::FixedVectorType::get(HalfTy, 8);
14063+ // Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14064+ // part of the vector.
14065+ if (BuiltinID == NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm) {
14066+ Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14067+ /*isQuad*/ false));
14068+ Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14069+ } else
14070+ Tys[1] = Ops[0]->getType();
14071+ llvm::Value *FPM =
14072+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14073+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt1");
14074+ }
14075+ case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
14076+ case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
14077+ case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm: {
14078+ Int = Intrinsic::aarch64_neon_fp8_cvtl2;
14079+ llvm::Type *Tys[2];
14080+ Tys[0] = llvm::FixedVectorType::get(HalfTy, 8);
14081+ // Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14082+ // part of the vector.
14083+ if (BuiltinID == NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm) {
14084+ Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14085+ /*isQuad*/ false));
14086+ Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14087+ } else
14088+ Tys[1] = Ops[0]->getType();
14089+ llvm::Value *FPM =
14090+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14091+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt2");
14092+ }
14093+ case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm: {
14094+ Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14095+ llvm::Type *Tys[2];
14096+ Tys[0] = llvm::FixedVectorType::get(Int8Ty, 8);
14097+ Tys[1] = Ops[0]->getType();
14098+ llvm::Value *FPM =
14099+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14100+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14101+ }
14102+ case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm: {
14103+ Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14104+ llvm::Type *Tys[2];
14105+ Tys[0] = llvm::FixedVectorType::get(Int8Ty, 8);
14106+ // Gets the expected type, because arm_neon.h casts float16x4_t to int8x8_t
14107+ Tys[1] = llvm::FixedVectorType::get(HalfTy, 4);
14108+ llvm::Value *FPM =
14109+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14110+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14111+ }
14112+ case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm: {
14113+ Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14114+ llvm::Type *Tys[2];
14115+ Tys[0] = llvm::FixedVectorType::get(Int8Ty, 16);
14116+ // Gets the expected type, because arm_neon.h casts float16x8_t to int8x16_t
14117+ Tys[1] = llvm::FixedVectorType::get(HalfTy, 8);
14118+ llvm::Value *FPM =
14119+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14120+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14121+ }
14122+ case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
14123+ Int = Intrinsic::aarch64_neon_fp8_fcvtn2;
14124+ llvm::Type *Tys[2];
14125+ Tys[0] = llvm::FixedVectorType::get(Int8Ty, 16);
14126+ Tys[1] = Ops[1]->getType();
14127+ Ops[0] = Builder.CreateInsertVector(Tys[0], PoisonValue::get(Tys[0]),
14128+ Ops[0], Builder.getInt64(0));
14129+ llvm::Value *FPM =
14130+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14131+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn2");
14132+ }
1401514133 case NEON::BI__builtin_neon_vamin_f16:
1401614134 case NEON::BI__builtin_neon_vaminq_f16:
1401714135 case NEON::BI__builtin_neon_vamin_f32:
0 commit comments