@@ -714,10 +714,13 @@ bool LoopTreeOptimization::isSafeReadOnlyApply(BasicCalleeAnalysis *BCA, ApplyIn
714714
715715static void checkSideEffects (swift::SILInstruction &Inst,
716716 InstSet &SideEffectInsts,
717- SmallVectorImpl<SILInstruction *> &sideEffectsInBlock) {
717+ SmallVectorImpl<SILInstruction *> &sideEffectsInBlock,
718+ bool &hasOtherMemReadingInsts) {
718719 if (Inst.mayHaveSideEffects ()) {
719720 SideEffectInsts.insert (&Inst);
720721 sideEffectsInBlock.push_back (&Inst);
722+ } else if (Inst.mayReadFromMemory ()) {
723+ hasOtherMemReadingInsts = true ;
721724 }
722725}
723726
@@ -885,11 +888,15 @@ void LoopTreeOptimization::analyzeCurrentLoop(
885888 SmallVector<BeginAccessInst *, 8 > BeginAccesses;
886889 SmallVector<FullApplySite, 8 > fullApplies;
887890
891+ // True if the loop has instructions which (may) read from memory, which are not
892+ // in `Loads` and not in `sideEffects`.
893+ bool hasOtherMemReadingInsts = false ;
894+
888895 for (auto *BB : Loop->getBlocks ()) {
889896 SmallVector<SILInstruction *, 8 > sideEffectsInBlock;
890897 for (auto &Inst : *BB) {
891898 if (hasOwnershipOperandsOrResults (&Inst)) {
892- checkSideEffects (Inst, sideEffects, sideEffectsInBlock);
899+ checkSideEffects (Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts );
893900 // Collect fullApplies to be checked in analyzeBeginAccess
894901 if (auto fullApply = FullApplySite::isa (&Inst)) {
895902 fullApplies.push_back (fullApply);
@@ -921,12 +928,12 @@ void LoopTreeOptimization::analyzeCurrentLoop(
921928 }
922929 Stores.push_back (store);
923930 LoadsAndStores.push_back (&Inst);
924- checkSideEffects (Inst, sideEffects, sideEffectsInBlock);
931+ checkSideEffects (Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts );
925932 break ;
926933 }
927934 case SILInstructionKind::BeginAccessInst:
928935 BeginAccesses.push_back (cast<BeginAccessInst>(&Inst));
929- checkSideEffects (Inst, sideEffects, sideEffectsInBlock);
936+ checkSideEffects (Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts );
930937 break ;
931938 case SILInstructionKind::RefElementAddrInst:
932939 SpecialHoist.push_back (cast<RefElementAddrInst>(&Inst));
@@ -937,7 +944,7 @@ void LoopTreeOptimization::analyzeCurrentLoop(
937944 // cond_fail that would have protected (executed before) a memory access
938945 // must - after hoisting - also be executed before said access.
939946 HoistUp.insert (&Inst);
940- checkSideEffects (Inst, sideEffects, sideEffectsInBlock);
947+ checkSideEffects (Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts );
941948 break ;
942949 case SILInstructionKind::ApplyInst: {
943950 auto *AI = cast<ApplyInst>(&Inst);
@@ -971,7 +978,7 @@ void LoopTreeOptimization::analyzeCurrentLoop(
971978 }
972979 }
973980
974- checkSideEffects (Inst, sideEffects, sideEffectsInBlock);
981+ checkSideEffects (Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts );
975982 if (canHoistUpDefault (&Inst, Loop, DomTree, RunsOnHighLevelSIL)) {
976983 HoistUp.insert (&Inst);
977984 }
@@ -1013,23 +1020,25 @@ void LoopTreeOptimization::analyzeCurrentLoop(
10131020 }
10141021 }
10151022
1016- // Collect memory locations for which we can move all loads and stores out
1017- // of the loop.
1018- //
1019- // Note: The Loads set and LoadsAndStores set may mutate during this loop.
1020- for (StoreInst *SI : Stores) {
1021- // Use AccessPathWithBase to recover a base address that can be used for
1022- // newly inserted memory operations. If we instead teach hoistLoadsAndStores
1023- // how to rematerialize global_addr, then we don't need this base.
1024- auto access = AccessPathWithBase::compute (SI->getDest ());
1025- auto accessPath = access.accessPath ;
1026- if (accessPath.isValid () &&
1027- (access.base && isLoopInvariant (access.base , Loop))) {
1028- if (isOnlyLoadedAndStored (AA, sideEffects, Loads, Stores, SI->getDest (),
1029- accessPath)) {
1030- if (!LoadAndStoreAddrs.count (accessPath)) {
1031- if (splitLoads (Loads, accessPath, SI->getDest ())) {
1032- LoadAndStoreAddrs.insert (accessPath);
1023+ if (!hasOtherMemReadingInsts) {
1024+ // Collect memory locations for which we can move all loads and stores out
1025+ // of the loop.
1026+ //
1027+ // Note: The Loads set and LoadsAndStores set may mutate during this loop.
1028+ for (StoreInst *SI : Stores) {
1029+ // Use AccessPathWithBase to recover a base address that can be used for
1030+ // newly inserted memory operations. If we instead teach hoistLoadsAndStores
1031+ // how to rematerialize global_addr, then we don't need this base.
1032+ auto access = AccessPathWithBase::compute (SI->getDest ());
1033+ auto accessPath = access.accessPath ;
1034+ if (accessPath.isValid () &&
1035+ (access.base && isLoopInvariant (access.base , Loop))) {
1036+ if (isOnlyLoadedAndStored (AA, sideEffects, Loads, Stores, SI->getDest (),
1037+ accessPath)) {
1038+ if (!LoadAndStoreAddrs.count (accessPath)) {
1039+ if (splitLoads (Loads, accessPath, SI->getDest ())) {
1040+ LoadAndStoreAddrs.insert (accessPath);
1041+ }
10331042 }
10341043 }
10351044 }
0 commit comments