@@ -6759,12 +6759,36 @@ Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
6759
6759
return Builder.CreateCall(F, Ops, name);
6760
6760
}
6761
6761
6762
+ Value *CodeGenFunction::EmitFP8NeonCall(Function *F,
6763
+ SmallVectorImpl<Value *> &Ops,
6764
+ Value *FPM, const char *name) {
6765
+ Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr), FPM);
6766
+ return EmitNeonCall(F, Ops, name);
6767
+ }
6768
+
6762
6769
Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
6763
6770
bool neg) {
6764
6771
int SV = cast<ConstantInt>(V)->getSExtValue();
6765
6772
return ConstantInt::get(Ty, neg ? -SV : SV);
6766
6773
}
6767
6774
6775
+ Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
6776
+ llvm::Type *Ty1, bool Extract,
6777
+ SmallVectorImpl<llvm::Value *> &Ops,
6778
+ const CallExpr *E,
6779
+ const char *name) {
6780
+ llvm::Type *Tys[] = {Ty0, Ty1};
6781
+ if (Extract) {
6782
+ // Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
6783
+ // the vector.
6784
+ Tys[1] = llvm::FixedVectorType::get(Int8Ty, 8);
6785
+ Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
6786
+ }
6787
+ llvm::Value *FPM =
6788
+ EmitScalarOrConstFoldImmArg(/* ICEArguments */ 0, E->getNumArgs() - 1, E);
6789
+ return EmitFP8NeonCall(CGM.getIntrinsic(IID, Tys), Ops, FPM, name);
6790
+ }
6791
+
6768
6792
// Right-shift a vector by a constant.
6769
6793
Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
6770
6794
llvm::Type *Ty, bool usgn,
@@ -12736,6 +12760,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
12736
12760
return V;
12737
12761
12738
12762
unsigned Int;
12763
+ bool ExtractLow = false;
12739
12764
switch (BuiltinID) {
12740
12765
default: return nullptr;
12741
12766
case NEON::BI__builtin_neon_vbsl_v:
@@ -13950,7 +13975,59 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
13950
13975
Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
13951
13976
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
13952
13977
}
13953
-
13978
+ case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
13979
+ ExtractLow = true;
13980
+ LLVM_FALLTHROUGH;
13981
+ case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
13982
+ case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
13983
+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
13984
+ llvm::FixedVectorType::get(BFloatTy, 8),
13985
+ Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
13986
+ case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
13987
+ ExtractLow = true;
13988
+ LLVM_FALLTHROUGH;
13989
+ case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
13990
+ case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
13991
+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
13992
+ llvm::FixedVectorType::get(BFloatTy, 8),
13993
+ Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
13994
+ case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
13995
+ ExtractLow = true;
13996
+ LLVM_FALLTHROUGH;
13997
+ case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
13998
+ case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
13999
+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
14000
+ llvm::FixedVectorType::get(HalfTy, 8),
14001
+ Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
14002
+ case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
14003
+ ExtractLow = true;
14004
+ LLVM_FALLTHROUGH;
14005
+ case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
14006
+ case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
14007
+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
14008
+ llvm::FixedVectorType::get(HalfTy, 8),
14009
+ Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
14010
+ case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
14011
+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
14012
+ llvm::FixedVectorType::get(Int8Ty, 8),
14013
+ Ops[0]->getType(), false, Ops, E, "vfcvtn");
14014
+ case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
14015
+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
14016
+ llvm::FixedVectorType::get(Int8Ty, 8),
14017
+ llvm::FixedVectorType::get(HalfTy, 4), false, Ops,
14018
+ E, "vfcvtn");
14019
+ case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
14020
+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
14021
+ llvm::FixedVectorType::get(Int8Ty, 16),
14022
+ llvm::FixedVectorType::get(HalfTy, 8), false, Ops,
14023
+ E, "vfcvtn");
14024
+ case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
14025
+ llvm::Type *Ty = llvm::FixedVectorType::get(Int8Ty, 16);
14026
+ Ops[0] = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
14027
+ Builder.getInt64(0));
14028
+ return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn2, Ty,
14029
+ Ops[1]->getType(), false, Ops, E, "vfcvtn2");
14030
+ }
13954
14031
case NEON::BI__builtin_neon_vamin_f16:
13955
14032
case NEON::BI__builtin_neon_vaminq_f16:
13956
14033
case NEON::BI__builtin_neon_vamin_f32:
0 commit comments