@@ -945,42 +945,164 @@ void CombinerHelper::applySextInRegOfLoad(
945945 MI.eraseFromParent ();
946946}
947947
948+ static Type *getTypeForLLT (LLT Ty, LLVMContext &C) {
949+ if (Ty.isVector ())
950+ return FixedVectorType::get (IntegerType::get (C, Ty.getScalarSizeInBits ()),
951+ Ty.getNumElements ());
952+ return IntegerType::get (C, Ty.getSizeInBits ());
953+ }
954+
955+ // / Return true if 'MI' is a load or a store that may be fold it's address
956+ // / operand into the load / store addressing mode.
957+ static bool canFoldInAddressingMode (GLoadStore *MI, const TargetLowering &TLI,
958+ MachineRegisterInfo &MRI) {
959+ TargetLowering::AddrMode AM;
960+ auto *MF = MI->getMF ();
961+ auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg (), MRI);
962+ if (!Addr)
963+ return false ;
964+
965+ AM.HasBaseReg = true ;
966+ if (auto CstOff = getIConstantVRegVal (Addr->getOffsetReg (), MRI))
967+ AM.BaseOffs = CstOff->getSExtValue (); // [reg +/- imm]
968+ else
969+ AM.Scale = 1 ; // [reg +/- reg]
970+
971+ return TLI.isLegalAddressingMode (
972+ MF->getDataLayout (), AM,
973+ getTypeForLLT (MI->getMMO ().getMemoryType (),
974+ MF->getFunction ().getContext ()),
975+ MI->getMMO ().getAddrSpace ());
976+ }
977+
978+ static unsigned getIndexedOpc (unsigned LdStOpc) {
979+ switch (LdStOpc) {
980+ case TargetOpcode::G_LOAD:
981+ return TargetOpcode::G_INDEXED_LOAD;
982+ case TargetOpcode::G_STORE:
983+ return TargetOpcode::G_INDEXED_STORE;
984+ case TargetOpcode::G_ZEXTLOAD:
985+ return TargetOpcode::G_INDEXED_ZEXTLOAD;
986+ case TargetOpcode::G_SEXTLOAD:
987+ return TargetOpcode::G_INDEXED_SEXTLOAD;
988+ default :
989+ llvm_unreachable (" Unexpected opcode" );
990+ }
991+ }
992+
993+ bool CombinerHelper::isIndexedLoadStoreLegal (GLoadStore &LdSt) const {
994+ // Check for legality.
995+ LLT PtrTy = MRI.getType (LdSt.getPointerReg ());
996+ LLT Ty = MRI.getType (LdSt.getReg (0 ));
997+ LLT MemTy = LdSt.getMMO ().getMemoryType ();
998+ SmallVector<LegalityQuery::MemDesc, 2 > MemDescrs (
999+ {{MemTy, MemTy.getSizeInBits (), AtomicOrdering::NotAtomic}});
1000+ unsigned IndexedOpc = getIndexedOpc (LdSt.getOpcode ());
1001+ SmallVector<LLT> OpTys;
1002+ if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1003+ OpTys = {PtrTy, Ty, Ty};
1004+ else
1005+ OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1006+
1007+ LegalityQuery Q (IndexedOpc, OpTys, MemDescrs);
1008+ return isLegal (Q);
1009+ }
1010+
1011+ static cl::opt<unsigned > PostIndexUseThreshold (
1012+ " post-index-use-threshold" , cl::Hidden, cl::init(32 ),
1013+ cl::desc(" Number of uses of a base pointer to check before it is no longer "
1014+ " considered for post-indexing." ));
1015+
9481016bool CombinerHelper::findPostIndexCandidate (GLoadStore &LdSt, Register &Addr,
949- Register &Base, Register &Offset) {
950- auto &MF = *LdSt.getParent ()->getParent ();
951- const auto &TLI = *MF.getSubtarget ().getTargetLowering ();
1017+ Register &Base, Register &Offset,
1018+ bool &RematOffset) {
1019+ // We're looking for the following pattern, for either load or store:
1020+ // %baseptr:_(p0) = ...
1021+ // G_STORE %val(s64), %baseptr(p0)
1022+ // %offset:_(s64) = G_CONSTANT i64 -256
1023+ // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1024+ const auto &TLI = getTargetLowering ();
1025+
1026+ Register Ptr = LdSt.getPointerReg ();
1027+ // If the store is the only use, don't bother.
1028+ if (MRI.hasOneNonDBGUse (Ptr))
1029+ return false ;
9521030
953- Base = LdSt.getPointerReg ();
1031+ if (!isIndexedLoadStoreLegal (LdSt))
1032+ return false ;
9541033
955- if (getOpcodeDef (TargetOpcode::G_FRAME_INDEX, Base , MRI))
1034+ if (getOpcodeDef (TargetOpcode::G_FRAME_INDEX, Ptr , MRI))
9561035 return false ;
9571036
958- // FIXME: The following use traversal needs a bail out for patholigical cases.
959- for (auto &Use : MRI.use_nodbg_instructions (Base)) {
1037+ MachineInstr *StoredValDef = getDefIgnoringCopies (LdSt.getReg (0 ), MRI);
1038+ auto *PtrDef = MRI.getVRegDef (Ptr);
1039+
1040+ unsigned NumUsesChecked = 0 ;
1041+ for (auto &Use : MRI.use_nodbg_instructions (Ptr)) {
1042+ if (++NumUsesChecked > PostIndexUseThreshold)
1043+ return false ; // Try to avoid exploding compile time.
1044+
9601045 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
961- if (!PtrAdd)
1046+ // The use itself might be dead. This can happen during combines if DCE
1047+ // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1048+ if (!PtrAdd || MRI.use_nodbg_empty (PtrAdd->getReg (0 )))
1049+ continue ;
1050+
1051+ // Check the user of this isn't the store, otherwise we'd be generate a
1052+ // indexed store defining its own use.
1053+ if (StoredValDef == &Use)
9621054 continue ;
9631055
9641056 Offset = PtrAdd->getOffsetReg ();
9651057 if (!ForceLegalIndexing &&
966- !TLI.isIndexingLegal (LdSt, Base, Offset, /* IsPre*/ false , MRI))
1058+ !TLI.isIndexingLegal (LdSt, PtrAdd->getBaseReg (), Offset,
1059+ /* IsPre*/ false , MRI))
9671060 continue ;
9681061
9691062 // Make sure the offset calculation is before the potentially indexed op.
9701063 MachineInstr *OffsetDef = MRI.getVRegDef (Offset);
971- if (!dominates (*OffsetDef, LdSt))
972- continue ;
1064+ RematOffset = false ;
1065+ if (!dominates (*OffsetDef, LdSt)) {
1066+ // If the offset however is just a G_CONSTANT, we can always just
1067+ // rematerialize it where we need it.
1068+ if (OffsetDef->getOpcode () != TargetOpcode::G_CONSTANT)
1069+ continue ;
1070+ RematOffset = true ;
1071+ }
9731072
974- // FIXME: check whether all uses of Base are load/store with foldable
975- // addressing modes. If so, using the normal addr-modes is better than
976- // forming an indexed one.
977- if (any_of (MRI.use_nodbg_instructions (PtrAdd->getReg (0 )),
978- [&](MachineInstr &PtrAddUse) {
979- return !dominates (LdSt, PtrAddUse);
980- }))
981- continue ;
1073+ for (auto &BasePtrUse : MRI.use_nodbg_instructions (PtrAdd->getBaseReg ())) {
1074+ if (&BasePtrUse == PtrDef)
1075+ continue ;
1076+
1077+ // If the user is a later load/store that can be post-indexed, then don't
1078+ // combine this one.
1079+ auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1080+ if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1081+ dominates (LdSt, *BasePtrLdSt) &&
1082+ isIndexedLoadStoreLegal (*BasePtrLdSt))
1083+ return false ;
1084+
1085+ // Now we're looking for the key G_PTR_ADD instruction, which contains
1086+ // the offset add that we want to fold.
1087+ if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1088+ Register PtrAddDefReg = BasePtrUseDef->getReg (0 );
1089+ for (auto &BaseUseUse : MRI.use_nodbg_instructions (PtrAddDefReg)) {
1090+ // If the use is in a different block, then we may produce worse code
1091+ // due to the extra register pressure.
1092+ if (BaseUseUse.getParent () != LdSt.getParent ())
1093+ return false ;
1094+
1095+ if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1096+ if (canFoldInAddressingMode (UseUseLdSt, TLI, MRI))
1097+ return false ;
1098+ }
1099+ if (!dominates (LdSt, BasePtrUse))
1100+ return false ; // All use must be dominated by the load/store.
1101+ }
1102+ }
9821103
9831104 Addr = PtrAdd->getReg (0 );
1105+ Base = PtrAdd->getBaseReg ();
9841106 return true ;
9851107 }
9861108
@@ -1001,6 +1123,9 @@ bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
10011123 !TLI.isIndexingLegal (LdSt, Base, Offset, /* IsPre*/ true , MRI))
10021124 return false ;
10031125
1126+ if (!isIndexedLoadStoreLegal (LdSt))
1127+ return false ;
1128+
10041129 MachineInstr *BaseDef = getDefIgnoringCopies (Base, MRI);
10051130 if (BaseDef->getOpcode () == TargetOpcode::G_FRAME_INDEX)
10061131 return false ;
@@ -1027,16 +1152,14 @@ bool CombinerHelper::matchCombineIndexedLoadStore(
10271152 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
10281153 auto &LdSt = cast<GLoadStore>(MI);
10291154
1030- // For now, no targets actually support these opcodes so don't waste time
1031- // running these unless we're forced to for testing.
1032- if (!ForceLegalIndexing)
1155+ if (LdSt.isAtomic ())
10331156 return false ;
10341157
10351158 MatchInfo.IsPre = findPreIndexCandidate (LdSt, MatchInfo.Addr , MatchInfo.Base ,
10361159 MatchInfo.Offset );
10371160 if (!MatchInfo.IsPre &&
10381161 !findPostIndexCandidate (LdSt, MatchInfo.Addr , MatchInfo.Base ,
1039- MatchInfo.Offset ))
1162+ MatchInfo.Offset , MatchInfo. RematOffset ))
10401163 return false ;
10411164
10421165 return true ;
@@ -1045,28 +1168,21 @@ bool CombinerHelper::matchCombineIndexedLoadStore(
10451168void CombinerHelper::applyCombineIndexedLoadStore (
10461169 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
10471170 MachineInstr &AddrDef = *MRI.getUniqueVRegDef (MatchInfo.Addr );
1048- MachineIRBuilder MIRBuilder (MI);
1171+ Builder. setInstrAndDebugLoc (MI);
10491172 unsigned Opcode = MI.getOpcode ();
10501173 bool IsStore = Opcode == TargetOpcode::G_STORE;
1051- unsigned NewOpcode;
1052- switch (Opcode) {
1053- case TargetOpcode::G_LOAD:
1054- NewOpcode = TargetOpcode::G_INDEXED_LOAD;
1055- break ;
1056- case TargetOpcode::G_SEXTLOAD:
1057- NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
1058- break ;
1059- case TargetOpcode::G_ZEXTLOAD:
1060- NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
1061- break ;
1062- case TargetOpcode::G_STORE:
1063- NewOpcode = TargetOpcode::G_INDEXED_STORE;
1064- break ;
1065- default :
1066- llvm_unreachable (" Unknown load/store opcode" );
1174+ unsigned NewOpcode = getIndexedOpc (Opcode);
1175+
1176+ // If the offset constant didn't happen to dominate the load/store, we can
1177+ // just clone it as needed.
1178+ if (MatchInfo.RematOffset ) {
1179+ auto *OldCst = MRI.getVRegDef (MatchInfo.Offset );
1180+ auto NewCst = Builder.buildConstant (MRI.getType (MatchInfo.Offset ),
1181+ *OldCst->getOperand (1 ).getCImm ());
1182+ MatchInfo.Offset = NewCst.getReg (0 );
10671183 }
10681184
1069- auto MIB = MIRBuilder .buildInstr (NewOpcode);
1185+ auto MIB = Builder .buildInstr (NewOpcode);
10701186 if (IsStore) {
10711187 MIB.addDef (MatchInfo.Addr );
10721188 MIB.addUse (MI.getOperand (0 ).getReg ());
@@ -1245,13 +1361,7 @@ void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
12451361 Observer.changedInstr (*BrCond);
12461362}
12471363
1248- static Type *getTypeForLLT (LLT Ty, LLVMContext &C) {
1249- if (Ty.isVector ())
1250- return FixedVectorType::get (IntegerType::get (C, Ty.getScalarSizeInBits ()),
1251- Ty.getNumElements ());
1252- return IntegerType::get (C, Ty.getSizeInBits ());
1253- }
1254-
1364+
12551365bool CombinerHelper::tryEmitMemcpyInline (MachineInstr &MI) {
12561366 MachineIRBuilder HelperBuilder (MI);
12571367 GISelObserverWrapper DummyObserver;
0 commit comments