@@ -808,14 +808,16 @@ void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
808808
809809bool matchBuildVectorToDup (MachineInstr &MI, MachineRegisterInfo &MRI) {
810810 assert (MI.getOpcode () == TargetOpcode::G_BUILD_VECTOR);
811-
811+ auto Splat = getAArch64VectorSplat (MI, MRI);
812+ if (!Splat)
813+ return false ;
814+ if (Splat->isReg ())
815+ return true ;
812816 // Later, during selection, we'll try to match imported patterns using
813817 // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
814818 // G_BUILD_VECTORs which could match those patterns.
815- if (isBuildVectorAllZeros (MI, MRI) || isBuildVectorAllOnes (MI, MRI))
816- return false ;
817-
818- return getAArch64VectorSplat (MI, MRI).has_value ();
819+ int64_t Cst = Splat->getCst ();
820+ return (Cst != 0 && Cst != -1 );
819821}
820822
821823void applyBuildVectorToDup (MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -931,40 +933,58 @@ void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
931933
932934// / \returns a function which builds a vector floating point compare instruction
933935// / for a condition code \p CC.
936+ // / \param [in] IsZero - True if the comparison is against 0.
934937// / \param [in] NoNans - True if the target has NoNansFPMath.
935938std::function<Register(MachineIRBuilder &)>
936- getVectorFCMP (AArch64CC::CondCode CC, Register LHS, Register RHS, bool NoNans ,
937- MachineRegisterInfo &MRI) {
939+ getVectorFCMP (AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero ,
940+ bool NoNans, MachineRegisterInfo &MRI) {
938941 LLT DstTy = MRI.getType (LHS);
939942 assert (DstTy.isVector () && " Expected vector types only?" );
940943 assert (DstTy == MRI.getType (RHS) && " Src and Dst types must match!" );
941944 switch (CC) {
942945 default :
943946 llvm_unreachable (" Unexpected condition code!" );
944947 case AArch64CC::NE:
945- return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
946- auto FCmp = MIB.buildInstr (AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
948+ return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
949+ auto FCmp = IsZero
950+ ? MIB.buildInstr (AArch64::G_FCMEQZ, {DstTy}, {LHS})
951+ : MIB.buildInstr (AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
947952 return MIB.buildNot (DstTy, FCmp).getReg (0 );
948953 };
949954 case AArch64CC::EQ:
950- return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
951- return MIB.buildInstr (AArch64::G_FCMEQ, {DstTy}, {LHS, RHS}).getReg (0 );
955+ return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
956+ return IsZero
957+ ? MIB.buildInstr (AArch64::G_FCMEQZ, {DstTy}, {LHS}).getReg (0 )
958+ : MIB.buildInstr (AArch64::G_FCMEQ, {DstTy}, {LHS, RHS})
959+ .getReg (0 );
952960 };
953961 case AArch64CC::GE:
954- return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
955- return MIB.buildInstr (AArch64::G_FCMGE, {DstTy}, {LHS, RHS}).getReg (0 );
962+ return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
963+ return IsZero
964+ ? MIB.buildInstr (AArch64::G_FCMGEZ, {DstTy}, {LHS}).getReg (0 )
965+ : MIB.buildInstr (AArch64::G_FCMGE, {DstTy}, {LHS, RHS})
966+ .getReg (0 );
956967 };
957968 case AArch64CC::GT:
958- return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
959- return MIB.buildInstr (AArch64::G_FCMGT, {DstTy}, {LHS, RHS}).getReg (0 );
969+ return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
970+ return IsZero
971+ ? MIB.buildInstr (AArch64::G_FCMGTZ, {DstTy}, {LHS}).getReg (0 )
972+ : MIB.buildInstr (AArch64::G_FCMGT, {DstTy}, {LHS, RHS})
973+ .getReg (0 );
960974 };
961975 case AArch64CC::LS:
962- return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
963- return MIB.buildInstr (AArch64::G_FCMGE, {DstTy}, {RHS, LHS}).getReg (0 );
976+ return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
977+ return IsZero
978+ ? MIB.buildInstr (AArch64::G_FCMLEZ, {DstTy}, {LHS}).getReg (0 )
979+ : MIB.buildInstr (AArch64::G_FCMGE, {DstTy}, {RHS, LHS})
980+ .getReg (0 );
964981 };
965982 case AArch64CC::MI:
966- return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
967- return MIB.buildInstr (AArch64::G_FCMGT, {DstTy}, {RHS, LHS}).getReg (0 );
983+ return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
984+ return IsZero
985+ ? MIB.buildInstr (AArch64::G_FCMLTZ, {DstTy}, {LHS}).getReg (0 )
986+ : MIB.buildInstr (AArch64::G_FCMGT, {DstTy}, {RHS, LHS})
987+ .getReg (0 );
968988 };
969989 }
970990}
@@ -1004,17 +1024,23 @@ void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
10041024
10051025 LLT DstTy = MRI.getType (Dst);
10061026
1027+ auto Splat = getAArch64VectorSplat (*MRI.getVRegDef (RHS), MRI);
1028+
1029+ // Compares against 0 have special target-specific pseudos.
1030+ bool IsZero = Splat && Splat->isCst () && Splat->getCst () == 0 ;
1031+
10071032 bool Invert = false ;
10081033 AArch64CC::CondCode CC, CC2 = AArch64CC::AL;
10091034 if ((Pred == CmpInst::Predicate::FCMP_ORD ||
10101035 Pred == CmpInst::Predicate::FCMP_UNO) &&
1011- isBuildVectorAllZeros (*MRI. getVRegDef (RHS), MRI) ) {
1036+ IsZero ) {
10121037 // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
10131038 // NaN, so equivalent to a == a and doesn't need the two comparisons an
10141039 // "ord" normally would.
10151040 // Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
10161041 // thus equivalent to a != a.
10171042 RHS = LHS;
1043+ IsZero = false ;
10181044 CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE;
10191045 } else
10201046 changeVectorFCMPPredToAArch64CC (Pred, CC, CC2, Invert);
@@ -1025,12 +1051,12 @@ void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
10251051 const bool NoNans =
10261052 ST.getTargetLowering ()->getTargetMachine ().Options .NoNaNsFPMath ;
10271053
1028- auto Cmp = getVectorFCMP (CC, LHS, RHS, NoNans, MRI);
1054+ auto Cmp = getVectorFCMP (CC, LHS, RHS, IsZero, NoNans, MRI);
10291055 Register CmpRes;
10301056 if (CC2 == AArch64CC::AL)
10311057 CmpRes = Cmp (MIB);
10321058 else {
1033- auto Cmp2 = getVectorFCMP (CC2, LHS, RHS, NoNans, MRI);
1059+ auto Cmp2 = getVectorFCMP (CC2, LHS, RHS, IsZero, NoNans, MRI);
10341060 auto Cmp2Dst = Cmp2 (MIB);
10351061 auto Cmp1Dst = Cmp (MIB);
10361062 CmpRes = MIB.buildOr (DstTy, Cmp1Dst, Cmp2Dst).getReg (0 );
0 commit comments