@@ -6908,6 +6908,13 @@ Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
69086908 return Builder.CreateCall(F, Ops, name);
69096909}
69106910
6911+ Value *CodeGenFunction::EmitFP8NeonCall(Function *F,
6912+ SmallVectorImpl<Value *> &Ops,
6913+ Value *FPM, const char *name) {
6914+ Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr), FPM);
6915+ return EmitNeonCall(F, Ops, name);
6916+ }
6917+
69116918Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
69126919 bool neg) {
69136920 int SV = cast<ConstantInt>(V)->getSExtValue();
@@ -14081,7 +14088,118 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1408114088 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
1408214089 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
1408314090 }
14084-
14091+ case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
14092+ case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
14093+ case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm: {
14094+ Int = Intrinsic::aarch64_neon_fp8_cvtl1;
14095+ llvm::Type *Tys[2];
14096+ Tys[0] = llvm::FixedVectorType::get(BFloatTy, 8);
14097+ // Op[1] is mfloat8x16_t, but the intrinsic converts only the lower part of
14098+ // the vector.
14099+ if (BuiltinID == NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm) {
14100+ Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14101+ /*isQuad*/ false));
14102+ Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14103+ } else
14104+ Tys[1] = Ops[0]->getType();
14105+ llvm::Value *FPM =
14106+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14107+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt1");
14108+ }
14109+ case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
14110+ case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
14111+ case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm: {
14112+ Int = Intrinsic::aarch64_neon_fp8_cvtl2;
14113+ llvm::Type *Tys[2];
14114+ Tys[0] = llvm::FixedVectorType::get(BFloatTy, 8);
14115+ // Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14116+ // part of the vector.
14117+ if (BuiltinID == NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm) {
14118+ Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14119+ /*isQuad*/ false));
14120+ Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14121+ } else
14122+ Tys[1] = Ops[0]->getType();
14123+ llvm::Value *FPM =
14124+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14125+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt2");
14126+ }
14127+ case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
14128+ case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
14129+ case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm: {
14130+ Int = Intrinsic::aarch64_neon_fp8_cvtl1;
14131+ llvm::Type *Tys[2];
14132+ Tys[0] = llvm::FixedVectorType::get(HalfTy, 8);
14133+ // Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14134+ // part of the vector.
14135+ if (BuiltinID == NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm) {
14136+ Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14137+ /*isQuad*/ false));
14138+ Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14139+ } else
14140+ Tys[1] = Ops[0]->getType();
14141+ llvm::Value *FPM =
14142+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14143+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt1");
14144+ }
14145+ case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
14146+ case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
14147+ case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm: {
14148+ Int = Intrinsic::aarch64_neon_fp8_cvtl2;
14149+ llvm::Type *Tys[2];
14150+ Tys[0] = llvm::FixedVectorType::get(HalfTy, 8);
14151+ // Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14152+ // part of the vector.
14153+ if (BuiltinID == NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm) {
14154+ Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14155+ /*isQuad*/ false));
14156+ Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14157+ } else
14158+ Tys[1] = Ops[0]->getType();
14159+ llvm::Value *FPM =
14160+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14161+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt2");
14162+ }
14163+ case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm: {
14164+ Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14165+ llvm::Type *Tys[2];
14166+ Tys[0] = llvm::FixedVectorType::get(Int8Ty, 8);
14167+ Tys[1] = Ops[0]->getType();
14168+ llvm::Value *FPM =
14169+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14170+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14171+ }
14172+ case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm: {
14173+ Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14174+ llvm::Type *Tys[2];
14175+ Tys[0] = llvm::FixedVectorType::get(Int8Ty, 8);
14176+ // Gets the expected type, because arm_neon.h casts float16x4_t to int8x8_t
14177+ Tys[1] = llvm::FixedVectorType::get(HalfTy, 4);
14178+ llvm::Value *FPM =
14179+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14180+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14181+ }
14182+ case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm: {
14183+ Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14184+ llvm::Type *Tys[2];
14185+ Tys[0] = llvm::FixedVectorType::get(Int8Ty, 16);
14186+ // Gets the expected type, because arm_neon.h casts float16x8_t to int8x16_t
14187+ Tys[1] = llvm::FixedVectorType::get(HalfTy, 8);
14188+ llvm::Value *FPM =
14189+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14190+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14191+ }
14192+ case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
14193+ Int = Intrinsic::aarch64_neon_fp8_fcvtn2;
14194+ llvm::Type *Tys[2];
14195+ Tys[0] = llvm::FixedVectorType::get(Int8Ty, 16);
14196+ Tys[1] = Ops[1]->getType();
14197+ Ops[0] = Builder.CreateInsertVector(Tys[0], PoisonValue::get(Tys[0]),
14198+ Ops[0], Builder.getInt64(0));
14199+ llvm::Value *FPM =
14200+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14201+ return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn2");
14202+ }
1408514203 case NEON::BI__builtin_neon_vamin_f16:
1408614204 case NEON::BI__builtin_neon_vaminq_f16:
1408714205 case NEON::BI__builtin_neon_vamin_f32:
0 commit comments