@@ -6766,6 +6766,24 @@ Value *CodeGenFunction::EmitFP8NeonCall(Function *F,
67666766 return EmitNeonCall(F, Ops, name);
67676767}
67686768
6769+ llvm::Value *CodeGenFunction::EmitFP8NeonFDOTCall(
6770+ unsigned IID, bool ExtendLane, llvm::Type *RetTy,
6771+ SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
6772+
6773+ const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
6774+ RetTy->getPrimitiveSizeInBits();
6775+ llvm::Type *Tys[] = {llvm::FixedVectorType::get(RetTy, ElemCount),
6776+ Ops[1]->getType()};
6777+ if (ExtendLane) {
6778+ auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
6779+ Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
6780+ Builder.getInt64(0));
6781+ }
6782+ llvm::Value *FPM =
6783+ EmitScalarOrConstFoldImmArg(/* ICEArguments */ 0, E->getNumArgs() - 1, E);
6784+ return EmitFP8NeonCall(CGM.getIntrinsic(IID, Tys), Ops, FPM, name);
6785+ }
6786+
67696787Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
67706788 bool neg) {
67716789 int SV = cast<ConstantInt>(V)->getSExtValue();
@@ -12761,6 +12779,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1276112779
1276212780 unsigned Int;
1276312781 bool ExtractLow = false;
12782+ bool ExtendLane = false;
1276412783 switch (BuiltinID) {
1276512784 default: return nullptr;
1276612785 case NEON::BI__builtin_neon_vbsl_v:
@@ -14028,6 +14047,31 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1402814047 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn2, Ty,
1402914048 Ops[1]->getType(), false, Ops, E, "vfcvtn2");
1403014049 }
14050+
14051+ case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
14052+ case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
14053+ return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2, false, HalfTy,
14054+ Ops, E, "fdot2");
14055+ case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
14056+ case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
14057+ ExtendLane = true;
14058+ LLVM_FALLTHROUGH;
14059+ case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
14060+ case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
14061+ return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2_lane,
14062+ ExtendLane, HalfTy, Ops, E, "fdot2_lane");
14063+ case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
14064+ case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
14065+ return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4, false,
14066+ FloatTy, Ops, E, "fdot4");
14067+ case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
14068+ case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
14069+ ExtendLane = true;
14070+ LLVM_FALLTHROUGH;
14071+ case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
14072+ case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
14073+ return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4_lane,
14074+ ExtendLane, FloatTy, Ops, E, "fdot4_lane");
1403114075 case NEON::BI__builtin_neon_vamin_f16:
1403214076 case NEON::BI__builtin_neon_vaminq_f16:
1403314077 case NEON::BI__builtin_neon_vamin_f32:
0 commit comments