@@ -884,6 +884,63 @@ bool SUnitWithMemInfo::getUnderlyingObjects() {
884884 return true ;
885885}
886886
887+ // / Returns true if there is a loop-carried order dependency from \p Src to \p
888+ // / Dst.
889+ static bool hasLoopCarriedMemDep (const SUnitWithMemInfo &Src,
890+ const SUnitWithMemInfo &Dst,
891+ BatchAAResults &BAA,
892+ const TargetInstrInfo *TII,
893+ const TargetRegisterInfo *TRI) {
894+ if (Src.isTriviallyDisjoint (Dst))
895+ return false ;
896+ if (isSuccOrder (Src.SU , Dst.SU ))
897+ return false ;
898+
899+ MachineInstr &SrcMI = *Src.SU ->getInstr ();
900+ MachineInstr &DstMI = *Dst.SU ->getInstr ();
901+ // First, perform the cheaper check that compares the base register.
902+ // If they are the same and the load offset is less than the store
903+ // offset, then mark the dependence as loop carried potentially.
904+ const MachineOperand *BaseOp1, *BaseOp2;
905+ int64_t Offset1, Offset2;
906+ bool Offset1IsScalable, Offset2IsScalable;
907+ if (TII->getMemOperandWithOffset (SrcMI, BaseOp1, Offset1, Offset1IsScalable,
908+ TRI) &&
909+ TII->getMemOperandWithOffset (DstMI, BaseOp2, Offset2, Offset2IsScalable,
910+ TRI)) {
911+ if (BaseOp1->isIdenticalTo (*BaseOp2) &&
912+ Offset1IsScalable == Offset2IsScalable && (int )Offset1 < (int )Offset2) {
913+ assert (TII->areMemAccessesTriviallyDisjoint (SrcMI, DstMI) &&
914+ " What happened to the chain edge?" );
915+ return true ;
916+ }
917+ }
918+
919+ // Second, the more expensive check that uses alias analysis on the
920+ // base registers. If they alias, and the load offset is less than
921+ // the store offset, the mark the dependence as loop carried.
922+ if (Src.isUnknown () || Dst.isUnknown ())
923+ return true ;
924+ if (Src.MemOpValue == Dst.MemOpValue && Src.MemOpOffset <= Dst.MemOpOffset )
925+ return true ;
926+
927+ if (BAA.isNoAlias (
928+ MemoryLocation::getBeforeOrAfter (Src.MemOpValue , Src.AATags ),
929+ MemoryLocation::getBeforeOrAfter (Dst.MemOpValue , Dst.AATags )))
930+ return false ;
931+
932+ // AliasAnalysis sometimes gives up on following the underlying
933+ // object. In such a case, separate checks for underlying objects may
934+ // prove that there are no aliases between two accesses.
935+ for (const Value *SrcObj : Src.UnderlyingObjs )
936+ for (const Value *DstObj : Dst.UnderlyingObjs )
937+ if (!BAA.isNoAlias (MemoryLocation::getBeforeOrAfter (SrcObj, Src.AATags ),
938+ MemoryLocation::getBeforeOrAfter (DstObj, Dst.AATags )))
939+ return true ;
940+
941+ return false ;
942+ }
943+
887944// / Add a chain edge between a load and store if the store can be an
888945// / alias of the load on a subsequent iteration, i.e., a loop carried
889946// / dependence. This code is very similar to the code in ScheduleDAGInstrs
@@ -898,76 +955,12 @@ void SwingSchedulerDAG::addLoopCarriedDependences() {
898955 PendingLoads.emplace_back (&SU);
899956 } else if (MI.mayStore ()) {
900957 SUnitWithMemInfo Store (&SU);
901- for (const SUnitWithMemInfo &Load : PendingLoads) {
902- if (Load.isTriviallyDisjoint (Store))
903- continue ;
904- if (isSuccOrder (Load.SU , Store.SU ))
905- continue ;
906- MachineInstr &LdMI = *Load.SU ->getInstr ();
907- // First, perform the cheaper check that compares the base register.
908- // If they are the same and the load offset is less than the store
909- // offset, then mark the dependence as loop carried potentially.
910- const MachineOperand *BaseOp1, *BaseOp2;
911- int64_t Offset1, Offset2;
912- bool Offset1IsScalable, Offset2IsScalable;
913- if (TII->getMemOperandWithOffset (LdMI, BaseOp1, Offset1,
914- Offset1IsScalable, TRI) &&
915- TII->getMemOperandWithOffset (MI, BaseOp2, Offset2,
916- Offset2IsScalable, TRI)) {
917- if (BaseOp1->isIdenticalTo (*BaseOp2) &&
918- Offset1IsScalable == Offset2IsScalable &&
919- (int )Offset1 < (int )Offset2) {
920- assert (TII->areMemAccessesTriviallyDisjoint (LdMI, MI) &&
921- " What happened to the chain edge?" );
922- SDep Dep (Load.SU , SDep::Barrier);
923- Dep.setLatency (1 );
924- SU.addPred (Dep);
925- continue ;
926- }
927- }
928- // Second, the more expensive check that uses alias analysis on the
929- // base registers. If they alias, and the load offset is less than
930- // the store offset, the mark the dependence as loop carried.
931- if (Load.isUnknown () || Store.isUnknown ()) {
932- SDep Dep (Load.SU , SDep::Barrier);
933- Dep.setLatency (1 );
934- SU.addPred (Dep);
935- continue ;
936- }
937- if (Load.MemOpValue == Store.MemOpValue &&
938- Load.MemOpOffset <= Store.MemOpOffset ) {
939- SDep Dep (Load.SU , SDep::Barrier);
940- Dep.setLatency (1 );
941- SU.addPred (Dep);
942- continue ;
943- }
944-
945- bool IsNoAlias = [&] {
946- if (BAA.isNoAlias (MemoryLocation::getBeforeOrAfter (Load.MemOpValue ,
947- Load.AATags ),
948- MemoryLocation::getBeforeOrAfter (Store.MemOpValue ,
949- Store.AATags )))
950- return true ;
951-
952- // AliasAnalysis sometimes gives up on following the underlying
953- // object. In such a case, separate checks for underlying objects may
954- // prove that there are no aliases between two accesses.
955- for (const Value *LoadObj : Load.UnderlyingObjs )
956- for (const Value *StoreObj : Store.UnderlyingObjs )
957- if (!BAA.isNoAlias (
958- MemoryLocation::getBeforeOrAfter (LoadObj, Load.AATags ),
959- MemoryLocation::getBeforeOrAfter (StoreObj, Store.AATags )))
960- return false ;
961-
962- return true ;
963- }();
964-
965- if (!IsNoAlias) {
958+ for (const SUnitWithMemInfo &Load : PendingLoads)
959+ if (hasLoopCarriedMemDep (Load, Store, BAA, TII, TRI)) {
966960 SDep Dep (Load.SU , SDep::Barrier);
967961 Dep.setLatency (1 );
968962 SU.addPred (Dep);
969963 }
970- }
971964 }
972965 }
973966}
0 commit comments