@@ -237,6 +237,37 @@ INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
237237INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE,
238238 " Modulo Software Pipelining" , false , false )
239239
240+ namespace {
241+
242+ // / This class holds an SUnit corresponding to a memory operation and other
243+ // / information related to the instruction.
244+ struct SUnitWithMemInfo {
245+ SUnit *SU;
246+ SmallVector<const Value *, 2 > UnderlyingObjs;
247+
248+ // / The value of a memory operand.
249+ const Value *MemOpValue = nullptr ;
250+
251+ // / The offset of a memory operand.
252+ int64_t MemOpOffset = 0 ;
253+
254+ AAMDNodes AATags;
255+
256+ // / True if all the underlying objects are identified.
257+ bool IsAllIdentified = false ;
258+
259+ SUnitWithMemInfo (SUnit *SU);
260+
261+ bool isTriviallyDisjoint (const SUnitWithMemInfo &Other) const ;
262+
263+ bool isUnknown () const { return MemOpValue == nullptr ; }
264+
265+ private:
266+ bool getUnderlyingObjects ();
267+ };
268+
269+ } // end anonymous namespace
270+
240271// / The "main" function for implementing Swing Modulo Scheduling.
241272bool MachinePipeliner::runOnMachineFunction (MachineFunction &mf) {
242273 if (skipFunction (mf.getFunction ()))
@@ -470,9 +501,10 @@ void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
470501bool MachinePipeliner::swingModuloScheduler (MachineLoop &L) {
471502 assert (L.getBlocks ().size () == 1 && " SMS works on single blocks only." );
472503
504+ AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults ();
473505 SwingSchedulerDAG SMS (
474506 *this , L, getAnalysis<LiveIntervalsWrapperPass>().getLIS (), RegClassInfo,
475- II_setByPragma, LI.LoopPipelinerInfo .get ());
507+ II_setByPragma, LI.LoopPipelinerInfo .get (), AA );
476508
477509 MachineBasicBlock *MBB = L.getHeader ();
478510 // The kernel should not include any terminator instructions. These
@@ -560,9 +592,8 @@ void SwingSchedulerDAG::setMAX_II() {
560592// / We override the schedule function in ScheduleDAGInstrs to implement the
561593// / scheduling part of the Swing Modulo Scheduling algorithm.
562594void SwingSchedulerDAG::schedule () {
563- AliasAnalysis *AA = &Pass.getAnalysis <AAResultsWrapperPass>().getAAResults ();
564595 buildSchedGraph (AA);
565- addLoopCarriedDependences (AA );
596+ addLoopCarriedDependences ();
566597 updatePhiDependences ();
567598 Topo.InitDAGTopologicalSorting ();
568599 changeDependences ();
@@ -810,113 +841,131 @@ static bool isDependenceBarrier(MachineInstr &MI) {
810841 (!MI.mayLoad () || !MI.isDereferenceableInvariantLoad ()));
811842}
812843
813- // / Return the underlying objects for the memory references of an instruction.
844+ SUnitWithMemInfo::SUnitWithMemInfo (SUnit *SU) : SU(SU) {
845+ if (!getUnderlyingObjects ())
846+ return ;
847+ for (const Value *Obj : UnderlyingObjs)
848+ if (!isIdentifiedObject (Obj)) {
849+ IsAllIdentified = false ;
850+ break ;
851+ }
852+ }
853+
854+ bool SUnitWithMemInfo::isTriviallyDisjoint (
855+ const SUnitWithMemInfo &Other) const {
856+ // If all underlying objects are identified objects and there is no overlap
857+ // between them, then these two instructions are disjoint.
858+ if (!IsAllIdentified || !Other.IsAllIdentified )
859+ return false ;
860+ for (const Value *Obj : UnderlyingObjs)
861+ if (llvm::is_contained (Other.UnderlyingObjs , Obj))
862+ return false ;
863+ return true ;
864+ }
865+
866+ // / Collect the underlying objects for the memory references of an instruction.
814867// / This function calls the code in ValueTracking, but first checks that the
815868// / instruction has a memory operand.
816- static void getUnderlyingObjects (const MachineInstr *MI,
817- SmallVectorImpl<const Value *> &Objs) {
869+ // / Returns false if we cannot find the underlying objects.
870+ bool SUnitWithMemInfo::getUnderlyingObjects () {
871+ const MachineInstr *MI = SU->getInstr ();
818872 if (!MI->hasOneMemOperand ())
819- return ;
873+ return false ;
820874 MachineMemOperand *MM = *MI->memoperands_begin ();
821875 if (!MM->getValue ())
822- return ;
823- getUnderlyingObjects (MM->getValue (), Objs);
824- for (const Value *V : Objs) {
825- if (!isIdentifiedObject (V)) {
826- Objs.clear ();
827- return ;
828- }
829- }
876+ return false ;
877+ MemOpValue = MM->getValue ();
878+ MemOpOffset = MM->getOffset ();
879+ llvm::getUnderlyingObjects (MemOpValue, UnderlyingObjs);
880+
881+ // TODO: A no alias scope may be valid only in a single iteration. In this
882+ // case we need to peel off it like LoopAccessAnalysis does.
883+ AATags = MM->getAAInfo ();
884+ return true ;
830885}
831886
832887// / Add a chain edge between a load and store if the store can be an
833888// / alias of the load on a subsequent iteration, i.e., a loop carried
834889// / dependence. This code is very similar to the code in ScheduleDAGInstrs
835890// / but that code doesn't create loop carried dependences.
836- void SwingSchedulerDAG::addLoopCarriedDependences (AliasAnalysis *AA) {
837- MapVector<const Value *, SmallVector<SUnit *, 4 >> PendingLoads;
838- Value *UnknownValue =
839- UndefValue::get (Type::getVoidTy (MF.getFunction ().getContext ()));
891+ void SwingSchedulerDAG::addLoopCarriedDependences () {
892+ SmallVector<SUnitWithMemInfo, 4 > PendingLoads;
840893 for (auto &SU : SUnits) {
841894 MachineInstr &MI = *SU.getInstr ();
842895 if (isDependenceBarrier (MI))
843896 PendingLoads.clear ();
844897 else if (MI.mayLoad ()) {
845- SmallVector<const Value *, 4 > Objs;
846- ::getUnderlyingObjects (&MI, Objs);
847- if (Objs.empty ())
848- Objs.push_back (UnknownValue);
849- for (const auto *V : Objs) {
850- SmallVector<SUnit *, 4 > &SUs = PendingLoads[V];
851- SUs.push_back (&SU);
852- }
898+ PendingLoads.emplace_back (&SU);
853899 } else if (MI.mayStore ()) {
854- SmallVector<const Value *, 4 > Objs;
855- ::getUnderlyingObjects (&MI, Objs);
856- if (Objs.empty ())
857- Objs.push_back (UnknownValue);
858- for (const auto *V : Objs) {
859- MapVector<const Value *, SmallVector<SUnit *, 4 >>::iterator I =
860- PendingLoads.find (V);
861- if (I == PendingLoads.end ())
900+ SUnitWithMemInfo Store (&SU);
901+ for (const SUnitWithMemInfo &Load : PendingLoads) {
902+ if (Load.isTriviallyDisjoint (Store))
862903 continue ;
863- for (auto *Load : I->second ) {
864- if (isSuccOrder (Load, &SU))
865- continue ;
866- MachineInstr &LdMI = *Load->getInstr ();
867- // First, perform the cheaper check that compares the base register.
868- // If they are the same and the load offset is less than the store
869- // offset, then mark the dependence as loop carried potentially.
870- const MachineOperand *BaseOp1, *BaseOp2;
871- int64_t Offset1, Offset2;
872- bool Offset1IsScalable, Offset2IsScalable;
873- if (TII->getMemOperandWithOffset (LdMI, BaseOp1, Offset1,
874- Offset1IsScalable, TRI) &&
875- TII->getMemOperandWithOffset (MI, BaseOp2, Offset2,
876- Offset2IsScalable, TRI)) {
877- if (BaseOp1->isIdenticalTo (*BaseOp2) &&
878- Offset1IsScalable == Offset2IsScalable &&
879- (int )Offset1 < (int )Offset2) {
880- assert (TII->areMemAccessesTriviallyDisjoint (LdMI, MI) &&
881- " What happened to the chain edge?" );
882- SDep Dep (Load, SDep::Barrier);
883- Dep.setLatency (1 );
884- SU.addPred (Dep);
885- continue ;
886- }
887- }
888- // Second, the more expensive check that uses alias analysis on the
889- // base registers. If they alias, and the load offset is less than
890- // the store offset, the mark the dependence as loop carried.
891- if (!AA) {
892- SDep Dep (Load, SDep::Barrier);
893- Dep.setLatency (1 );
894- SU.addPred (Dep);
895- continue ;
896- }
897- MachineMemOperand *MMO1 = *LdMI.memoperands_begin ();
898- MachineMemOperand *MMO2 = *MI.memoperands_begin ();
899- if (!MMO1->getValue () || !MMO2->getValue ()) {
900- SDep Dep (Load, SDep::Barrier);
901- Dep.setLatency (1 );
902- SU.addPred (Dep);
903- continue ;
904- }
905- if (MMO1->getValue () == MMO2->getValue () &&
906- MMO1->getOffset () <= MMO2->getOffset ()) {
907- SDep Dep (Load, SDep::Barrier);
904+ if (isSuccOrder (Load.SU , Store.SU ))
905+ continue ;
906+ MachineInstr &LdMI = *Load.SU ->getInstr ();
907+ // First, perform the cheaper check that compares the base register.
908+ // If they are the same and the load offset is less than the store
909+ // offset, then mark the dependence as loop carried potentially.
910+ const MachineOperand *BaseOp1, *BaseOp2;
911+ int64_t Offset1, Offset2;
912+ bool Offset1IsScalable, Offset2IsScalable;
913+ if (TII->getMemOperandWithOffset (LdMI, BaseOp1, Offset1,
914+ Offset1IsScalable, TRI) &&
915+ TII->getMemOperandWithOffset (MI, BaseOp2, Offset2,
916+ Offset2IsScalable, TRI)) {
917+ if (BaseOp1->isIdenticalTo (*BaseOp2) &&
918+ Offset1IsScalable == Offset2IsScalable &&
919+ (int )Offset1 < (int )Offset2) {
920+ assert (TII->areMemAccessesTriviallyDisjoint (LdMI, MI) &&
921+ " What happened to the chain edge?" );
922+ SDep Dep (Load.SU , SDep::Barrier);
908923 Dep.setLatency (1 );
909924 SU.addPred (Dep);
910925 continue ;
911926 }
912- if (!AA->isNoAlias (
913- MemoryLocation::getAfter (MMO1->getValue (), MMO1->getAAInfo ()),
914- MemoryLocation::getAfter (MMO2->getValue (),
915- MMO2->getAAInfo ()))) {
916- SDep Dep (Load, SDep::Barrier);
917- Dep.setLatency (1 );
918- SU.addPred (Dep);
919- }
927+ }
928+ // Second, the more expensive check that uses alias analysis on the
929+ // base registers. If they alias, and the load offset is less than
930+ // the store offset, the mark the dependence as loop carried.
931+ if (Load.isUnknown () || Store.isUnknown ()) {
932+ SDep Dep (Load.SU , SDep::Barrier);
933+ Dep.setLatency (1 );
934+ SU.addPred (Dep);
935+ continue ;
936+ }
937+ if (Load.MemOpValue == Store.MemOpValue &&
938+ Load.MemOpOffset <= Store.MemOpOffset ) {
939+ SDep Dep (Load.SU , SDep::Barrier);
940+ Dep.setLatency (1 );
941+ SU.addPred (Dep);
942+ continue ;
943+ }
944+
945+ bool IsNoAlias = [&] {
946+ if (BAA.isNoAlias (MemoryLocation::getBeforeOrAfter (Load.MemOpValue ,
947+ Load.AATags ),
948+ MemoryLocation::getBeforeOrAfter (Store.MemOpValue ,
949+ Store.AATags )))
950+ return true ;
951+
952+ // AliasAnalysis sometimes gives up on following the underlying
953+ // object. In such a case, separate checks for underlying objects may
954+ // prove that there are no aliases between two accesses.
955+ for (const Value *LoadObj : Load.UnderlyingObjs )
956+ for (const Value *StoreObj : Store.UnderlyingObjs )
957+ if (!BAA.isNoAlias (
958+ MemoryLocation::getBeforeOrAfter (LoadObj, Load.AATags ),
959+ MemoryLocation::getBeforeOrAfter (StoreObj, Store.AATags )))
960+ return false ;
961+
962+ return true ;
963+ }();
964+
965+ if (!IsNoAlias) {
966+ SDep Dep (Load.SU , SDep::Barrier);
967+ Dep.setLatency (1 );
968+ SU.addPred (Dep);
920969 }
921970 }
922971 }
0 commit comments