@@ -6881,6 +6881,23 @@ Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
68816881 return ConstantInt::get(Ty, neg ? -SV : SV);
68826882}
68836883
6884+ Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
6885+ llvm::Type *Ty1, bool Extract,
6886+ SmallVectorImpl<llvm::Value *> &Ops,
6887+ const CallExpr *E,
6888+ const char *name) {
6889+ llvm::Type *Tys[] = {Ty0, Ty1};
6890+ if (Extract) {
6891+ // Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
6892+ // the vector.
6893+ Tys[1] = llvm::FixedVectorType::get(Int8Ty, 8);
6894+ Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
6895+ }
6896+ llvm::Value *FPM =
6897+ EmitScalarOrConstFoldImmArg(/* ICEArguments */ 0, E->getNumArgs() - 1, E);
6898+ return EmitFP8NeonCall(CGM.getIntrinsic(IID, Tys), Ops, FPM, name);
6899+ }
6900+
68846901// Right-shift a vector by a constant.
68856902Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
68866903 llvm::Type *Ty, bool usgn,
@@ -12804,6 +12821,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1280412821 return V;
1280512822
1280612823 unsigned Int;
12824+ bool ExtractLow = false;
1280712825 switch (BuiltinID) {
1280812826 default: return nullptr;
1280912827 case NEON::BI__builtin_neon_vbsl_v:
@@ -14018,117 +14036,58 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1401814036 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
1401914037 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
1402014038 }
14021- case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
1402214039 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
14023- case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm: {
14024- Int = Intrinsic::aarch64_neon_fp8_cvtl1;
14025- llvm::Type *Tys[2];
14026- Tys[0] = llvm::FixedVectorType::get(BFloatTy, 8);
14027- // Op[1] is mfloat8x16_t, but the intrinsic converts only the lower part of
14028- // the vector.
14029- if (BuiltinID == NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm) {
14030- Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14031- /*isQuad*/ false));
14032- Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14033- } else
14034- Tys[1] = Ops[0]->getType();
14035- llvm::Value *FPM =
14036- EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14037- return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt1");
14038- }
14039- case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
14040+ ExtractLow = true;
14041+ LLVM_FALLTHROUGH;
14042+ case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
14043+ case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
14044+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
14045+ llvm::FixedVectorType::get(BFloatTy, 8),
14046+ Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
1404014047 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
14041- case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm: {
14042- Int = Intrinsic::aarch64_neon_fp8_cvtl2;
14043- llvm::Type *Tys[2];
14044- Tys[0] = llvm::FixedVectorType::get(BFloatTy, 8);
14045- // Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14046- // part of the vector.
14047- if (BuiltinID == NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm) {
14048- Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14049- /*isQuad*/ false));
14050- Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14051- } else
14052- Tys[1] = Ops[0]->getType();
14053- llvm::Value *FPM =
14054- EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14055- return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt2");
14056- }
14057- case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
14048+ ExtractLow = true;
14049+ LLVM_FALLTHROUGH;
14050+ case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
14051+ case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
14052+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
14053+ llvm::FixedVectorType::get(BFloatTy, 8),
14054+ Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
1405814055 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
14059- case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm: {
14060- Int = Intrinsic::aarch64_neon_fp8_cvtl1;
14061- llvm::Type *Tys[2];
14062- Tys[0] = llvm::FixedVectorType::get(HalfTy, 8);
14063- // Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14064- // part of the vector.
14065- if (BuiltinID == NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm) {
14066- Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14067- /*isQuad*/ false));
14068- Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14069- } else
14070- Tys[1] = Ops[0]->getType();
14071- llvm::Value *FPM =
14072- EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14073- return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt1");
14074- }
14075- case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
14056+ ExtractLow = true;
14057+ LLVM_FALLTHROUGH;
14058+ case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
14059+ case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
14060+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
14061+ llvm::FixedVectorType::get(HalfTy, 8),
14062+ Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
1407614063 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
14077- case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm: {
14078- Int = Intrinsic::aarch64_neon_fp8_cvtl2;
14079- llvm::Type *Tys[2];
14080- Tys[0] = llvm::FixedVectorType::get(HalfTy, 8);
14081- // Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14082- // part of the vector.
14083- if (BuiltinID == NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm) {
14084- Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14085- /*isQuad*/ false));
14086- Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14087- } else
14088- Tys[1] = Ops[0]->getType();
14089- llvm::Value *FPM =
14090- EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14091- return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt2");
14092- }
14093- case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm: {
14094- Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14095- llvm::Type *Tys[2];
14096- Tys[0] = llvm::FixedVectorType::get(Int8Ty, 8);
14097- Tys[1] = Ops[0]->getType();
14098- llvm::Value *FPM =
14099- EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14100- return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14101- }
14102- case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm: {
14103- Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14104- llvm::Type *Tys[2];
14105- Tys[0] = llvm::FixedVectorType::get(Int8Ty, 8);
14106- // Gets the expected type, because arm_neon.h casts float16x4_t to int8x8_t
14107- Tys[1] = llvm::FixedVectorType::get(HalfTy, 4);
14108- llvm::Value *FPM =
14109- EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14110- return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14111- }
14112- case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm: {
14113- Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14114- llvm::Type *Tys[2];
14115- Tys[0] = llvm::FixedVectorType::get(Int8Ty, 16);
14116- // Gets the expected type, because arm_neon.h casts float16x8_t to int8x16_t
14117- Tys[1] = llvm::FixedVectorType::get(HalfTy, 8);
14118- llvm::Value *FPM =
14119- EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14120- return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14121- }
14064+ ExtractLow = true;
14065+ LLVM_FALLTHROUGH;
14066+ case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
14067+ case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
14068+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
14069+ llvm::FixedVectorType::get(HalfTy, 8),
14070+ Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
14071+ case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
14072+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
14073+ llvm::FixedVectorType::get(Int8Ty, 8),
14074+ Ops[0]->getType(), false, Ops, E, "vfcvtn");
14075+ case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
14076+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
14077+ llvm::FixedVectorType::get(Int8Ty, 8),
14078+ llvm::FixedVectorType::get(HalfTy, 4), false, Ops,
14079+ E, "vfcvtn");
14080+ case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
14081+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
14082+ llvm::FixedVectorType::get(Int8Ty, 16),
14083+ llvm::FixedVectorType::get(HalfTy, 8), false, Ops,
14084+ E, "vfcvtn");
1412214085 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
14123- Int = Intrinsic::aarch64_neon_fp8_fcvtn2;
14124- llvm::Type *Tys[2];
14125- Tys[0] = llvm::FixedVectorType::get(Int8Ty, 16);
14126- Tys[1] = Ops[1]->getType();
14127- Ops[0] = Builder.CreateInsertVector(Tys[0], PoisonValue::get(Tys[0]),
14128- Ops[0], Builder.getInt64(0));
14129- llvm::Value *FPM =
14130- EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14131- return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn2");
14086+ llvm::Type *Ty = llvm::FixedVectorType::get(Int8Ty, 16);
14087+ Ops[0] = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
14088+ Builder.getInt64(0));
14089+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn2,
14090+ Ty, Ops[1]->getType(), false, Ops, E, "vfcvtn2");
1413214091 }
1413314092 case NEON::BI__builtin_neon_vamin_f16:
1413414093 case NEON::BI__builtin_neon_vaminq_f16:
0 commit comments