@@ -6921,6 +6921,23 @@ Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
69216921 return ConstantInt::get(Ty, neg ? -SV : SV);
69226922}
69236923
6924+ Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
6925+ llvm::Type *Ty1, bool Extract,
6926+ SmallVectorImpl<llvm::Value *> &Ops,
6927+ const CallExpr *E,
6928+ const char *name) {
6929+ llvm::Type *Tys[] = {Ty0, Ty1};
6930+ if (Extract) {
6931+ // Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
6932+ // the vector.
6933+ Tys[1] = llvm::FixedVectorType::get(Int8Ty, 8);
6934+ Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
6935+ }
6936+ llvm::Value *FPM =
6937+ EmitScalarOrConstFoldImmArg(/* ICEArguments */ 0, E->getNumArgs() - 1, E);
6938+ return EmitFP8NeonCall(CGM.getIntrinsic(IID, Tys), Ops, FPM, name);
6939+ }
6940+
69246941// Right-shift a vector by a constant.
69256942Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
69266943 llvm::Type *Ty, bool usgn,
@@ -12874,6 +12891,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1287412891 return V;
1287512892
1287612893 unsigned Int;
12894+ bool ExtractLow = false;
1287712895 switch (BuiltinID) {
1287812896 default: return nullptr;
1287912897 case NEON::BI__builtin_neon_vbsl_v:
@@ -14088,117 +14106,58 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1408814106 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
1408914107 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
1409014108 }
14091- case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
1409214109 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
14093- case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm: {
14094- Int = Intrinsic::aarch64_neon_fp8_cvtl1;
14095- llvm::Type *Tys[2];
14096- Tys[0] = llvm::FixedVectorType::get(BFloatTy, 8);
14097- // Op[1] is mfloat8x16_t, but the intrinsic converts only the lower part of
14098- // the vector.
14099- if (BuiltinID == NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm) {
14100- Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14101- /*isQuad*/ false));
14102- Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14103- } else
14104- Tys[1] = Ops[0]->getType();
14105- llvm::Value *FPM =
14106- EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14107- return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt1");
14108- }
14109- case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
14110+ ExtractLow = true;
14111+ LLVM_FALLTHROUGH;
14112+ case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
14113+ case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
14114+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
14115+ llvm::FixedVectorType::get(BFloatTy, 8),
14116+ Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
1411014117 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
14111- case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm: {
14112- Int = Intrinsic::aarch64_neon_fp8_cvtl2;
14113- llvm::Type *Tys[2];
14114- Tys[0] = llvm::FixedVectorType::get(BFloatTy, 8);
14115- // Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14116- // part of the vector.
14117- if (BuiltinID == NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm) {
14118- Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14119- /*isQuad*/ false));
14120- Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14121- } else
14122- Tys[1] = Ops[0]->getType();
14123- llvm::Value *FPM =
14124- EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14125- return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt2");
14126- }
14127- case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
14118+ ExtractLow = true;
14119+ LLVM_FALLTHROUGH;
14120+ case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
14121+ case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
14122+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
14123+ llvm::FixedVectorType::get(BFloatTy, 8),
14124+ Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
1412814125 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
14129- case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm: {
14130- Int = Intrinsic::aarch64_neon_fp8_cvtl1;
14131- llvm::Type *Tys[2];
14132- Tys[0] = llvm::FixedVectorType::get(HalfTy, 8);
14133- // Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14134- // part of the vector.
14135- if (BuiltinID == NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm) {
14136- Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14137- /*isQuad*/ false));
14138- Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14139- } else
14140- Tys[1] = Ops[0]->getType();
14141- llvm::Value *FPM =
14142- EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14143- return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt1");
14144- }
14145- case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
14126+ ExtractLow = true;
14127+ LLVM_FALLTHROUGH;
14128+ case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
14129+ case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
14130+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
14131+ llvm::FixedVectorType::get(HalfTy, 8),
14132+ Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
1414614133 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
14147- case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm: {
14148- Int = Intrinsic::aarch64_neon_fp8_cvtl2;
14149- llvm::Type *Tys[2];
14150- Tys[0] = llvm::FixedVectorType::get(HalfTy, 8);
14151- // Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14152- // part of the vector.
14153- if (BuiltinID == NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm) {
14154- Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14155- /*isQuad*/ false));
14156- Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14157- } else
14158- Tys[1] = Ops[0]->getType();
14159- llvm::Value *FPM =
14160- EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14161- return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt2");
14162- }
14163- case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm: {
14164- Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14165- llvm::Type *Tys[2];
14166- Tys[0] = llvm::FixedVectorType::get(Int8Ty, 8);
14167- Tys[1] = Ops[0]->getType();
14168- llvm::Value *FPM =
14169- EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14170- return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14171- }
14172- case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm: {
14173- Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14174- llvm::Type *Tys[2];
14175- Tys[0] = llvm::FixedVectorType::get(Int8Ty, 8);
14176- // Gets the expected type, because arm_neon.h casts float16x4_t to int8x8_t
14177- Tys[1] = llvm::FixedVectorType::get(HalfTy, 4);
14178- llvm::Value *FPM =
14179- EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14180- return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14181- }
14182- case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm: {
14183- Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14184- llvm::Type *Tys[2];
14185- Tys[0] = llvm::FixedVectorType::get(Int8Ty, 16);
14186- // Gets the expected type, because arm_neon.h casts float16x8_t to int8x16_t
14187- Tys[1] = llvm::FixedVectorType::get(HalfTy, 8);
14188- llvm::Value *FPM =
14189- EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14190- return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14191- }
14134+ ExtractLow = true;
14135+ LLVM_FALLTHROUGH;
14136+ case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
14137+ case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
14138+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
14139+ llvm::FixedVectorType::get(HalfTy, 8),
14140+ Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
14141+ case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
14142+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
14143+ llvm::FixedVectorType::get(Int8Ty, 8),
14144+ Ops[0]->getType(), false, Ops, E, "vfcvtn");
14145+ case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
14146+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
14147+ llvm::FixedVectorType::get(Int8Ty, 8),
14148+ llvm::FixedVectorType::get(HalfTy, 4), false, Ops,
14149+ E, "vfcvtn");
14150+ case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
14151+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
14152+ llvm::FixedVectorType::get(Int8Ty, 16),
14153+ llvm::FixedVectorType::get(HalfTy, 8), false, Ops,
14154+ E, "vfcvtn");
1419214155 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
14193- Int = Intrinsic::aarch64_neon_fp8_fcvtn2;
14194- llvm::Type *Tys[2];
14195- Tys[0] = llvm::FixedVectorType::get(Int8Ty, 16);
14196- Tys[1] = Ops[1]->getType();
14197- Ops[0] = Builder.CreateInsertVector(Tys[0], PoisonValue::get(Tys[0]),
14198- Ops[0], Builder.getInt64(0));
14199- llvm::Value *FPM =
14200- EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14201- return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn2");
14156+ llvm::Type *Ty = llvm::FixedVectorType::get(Int8Ty, 16);
14157+ Ops[0] = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
14158+ Builder.getInt64(0));
14159+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn2,
14160+ Ty, Ops[1]->getType(), false, Ops, E, "vfcvtn2");
1420214161 }
1420314162 case NEON::BI__builtin_neon_vamin_f16:
1420414163 case NEON::BI__builtin_neon_vaminq_f16:
0 commit comments