@@ -6868,6 +6868,13 @@ Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
68686868 return Builder.CreateCall(F, Ops, name);
68696869}
68706870
6871+ Value *CodeGenFunction::EmitFP8NeonCall(Function *F,
6872+ SmallVectorImpl<Value *> &Ops,
6873+ Value *FPM, const char *name) {
6874+ Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr), FPM);
6875+ return EmitNeonCall(F, Ops, name);
6876+ }
6877+
68716878Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
68726879 bool neg) {
68736880 int SV = cast<ConstantInt>(V)->getSExtValue();
@@ -14002,7 +14009,118 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1400214009 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
1400314010 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
1400414011 }
14005-
14012+ case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
14013+ case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
14014+ case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm: {
14015+ Int = Intrinsic::aarch64_neon_fp8_cvtl1;
14016+ llvm::Type *Tys[2];
14017+ Tys[0] = llvm::FixedVectorType::get(BFloatTy, 8);
14018+ // Op[1] is mfloat8x16_t, but the intrinsic converts only the lower part of
14019+ // the vector.
14020+ if (BuiltinID == NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm) {
14021+ Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14022+ /*isQuad*/ false));
14023+ Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14024+ } else
14025+ Tys[1] = Ops[0]->getType();
14026+ llvm::Value *FPM =
14027+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14028+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt1");
14029+ }
14030+ case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
14031+ case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
14032+ case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm: {
14033+ Int = Intrinsic::aarch64_neon_fp8_cvtl2;
14034+ llvm::Type *Tys[2];
14035+ Tys[0] = llvm::FixedVectorType::get(BFloatTy, 8);
14036+ // Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14037+ // part of the vector.
14038+ if (BuiltinID == NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm) {
14039+ Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14040+ /*isQuad*/ false));
14041+ Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14042+ } else
14043+ Tys[1] = Ops[0]->getType();
14044+ llvm::Value *FPM =
14045+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14046+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt2");
14047+ }
14048+ case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
14049+ case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
14050+ case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm: {
14051+ Int = Intrinsic::aarch64_neon_fp8_cvtl1;
14052+ llvm::Type *Tys[2];
14053+ Tys[0] = llvm::FixedVectorType::get(HalfTy, 8);
14054+ // Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14055+ // part of the vector.
14056+ if (BuiltinID == NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm) {
14057+ Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14058+ /*isQuad*/ false));
14059+ Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14060+ } else
14061+ Tys[1] = Ops[0]->getType();
14062+ llvm::Value *FPM =
14063+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14064+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt1");
14065+ }
14066+ case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
14067+ case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
14068+ case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm: {
14069+ Int = Intrinsic::aarch64_neon_fp8_cvtl2;
14070+ llvm::Type *Tys[2];
14071+ Tys[0] = llvm::FixedVectorType::get(HalfTy, 8);
14072+ // Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14073+ // part of the vector.
14074+ if (BuiltinID == NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm) {
14075+ Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14076+ /*isQuad*/ false));
14077+ Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14078+ } else
14079+ Tys[1] = Ops[0]->getType();
14080+ llvm::Value *FPM =
14081+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14082+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt2");
14083+ }
14084+ case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm: {
14085+ Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14086+ llvm::Type *Tys[2];
14087+ Tys[0] = llvm::FixedVectorType::get(Int8Ty, 8);
14088+ Tys[1] = Ops[0]->getType();
14089+ llvm::Value *FPM =
14090+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14091+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14092+ }
14093+ case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm: {
14094+ Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14095+ llvm::Type *Tys[2];
14096+ Tys[0] = llvm::FixedVectorType::get(Int8Ty, 8);
14097+ // Gets the expected type, because arm_neon.h casts float16x4_t to int8x8_t
14098+ Tys[1] = llvm::FixedVectorType::get(HalfTy, 4);
14099+ llvm::Value *FPM =
14100+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14101+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14102+ }
14103+ case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm: {
14104+ Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14105+ llvm::Type *Tys[2];
14106+ Tys[0] = llvm::FixedVectorType::get(Int8Ty, 16);
14107+ // Gets the expected type, because arm_neon.h casts float16x8_t to int8x16_t
14108+ Tys[1] = llvm::FixedVectorType::get(HalfTy, 8);
14109+ llvm::Value *FPM =
14110+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14111+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14112+ }
14113+ case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
14114+ Int = Intrinsic::aarch64_neon_fp8_fcvtn2;
14115+ llvm::Type *Tys[2];
14116+ Tys[0] = llvm::FixedVectorType::get(Int8Ty, 16);
14117+ Tys[1] = Ops[1]->getType();
14118+ Ops[0] = Builder.CreateInsertVector(Tys[0], PoisonValue::get(Tys[0]),
14119+ Ops[0], Builder.getInt64(0));
14120+ llvm::Value *FPM =
14121+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14122+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn2");
14123+ }
1400614124 case NEON::BI__builtin_neon_vamin_f16:
1400714125 case NEON::BI__builtin_neon_vaminq_f16:
1400814126 case NEON::BI__builtin_neon_vamin_f32:
0 commit comments