Skip to content

Commit a8c0455

Browse files
authored
Merge pull request swiftlang#30789 from atrick/fix-licm-combined-ldst
Fix LICM combined load/store hoisting/sinking optimization.
2 parents 41cc3f3 + 1c12de3 commit a8c0455

File tree

3 files changed

+252
-5
lines changed

3 files changed

+252
-5
lines changed

lib/SILOptimizer/LoopTransforms/LICM.cpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,13 +103,23 @@ static LoadInst *isLoadFromAddr(SILInstruction *I, SILValue addr) {
103103
/// Returns true if all instructions in \p SideEffectInsts which may alias with
104104
/// \p addr are either loads or stores from \p addr.
105105
static bool isOnlyLoadedAndStored(AliasAnalysis *AA, InstSet &SideEffectInsts,
106+
ArrayRef<LoadInst *> Loads,
107+
ArrayRef<StoreInst *> Stores,
106108
SILValue addr) {
107109
for (auto *I : SideEffectInsts) {
108110
if (AA->mayReadOrWriteMemory(I, addr) &&
109111
!isStoreToAddr(I, addr) && !isLoadFromAddr(I, addr)) {
110112
return false;
111113
}
112114
}
115+
for (auto *LI : Loads) {
116+
if (AA->mayReadFromMemory(LI, addr) && !isLoadFromAddr(LI, addr))
117+
return false;
118+
}
119+
for (auto *SI : Stores) {
120+
if (AA->mayWriteToMemory(SI, addr) && !isStoreToAddr(SI, addr))
121+
return false;
122+
}
113123
return true;
114124
}
115125

@@ -869,7 +879,7 @@ void LoopTreeOptimization::analyzeCurrentLoop(
869879
for (StoreInst *SI : Stores) {
870880
SILValue addr = SI->getDest();
871881
if (isLoopInvariant(addr, Loop) &&
872-
isOnlyLoadedAndStored(AA, sideEffects, addr)) {
882+
isOnlyLoadedAndStored(AA, sideEffects, Loads, Stores, addr)) {
873883
LoadAndStoreAddrs.insert(addr);
874884
}
875885
}
@@ -1015,6 +1025,7 @@ void LoopTreeOptimization::hoistLoadsAndStores(SILValue addr, SILLoop *loop, Ins
10151025
SILBuilder B(preheader->getTerminator());
10161026
auto *initialLoad = B.createLoad(preheader->getTerminator()->getLoc(), addr,
10171027
LoadOwnershipQualifier::Unqualified);
1028+
LLVM_DEBUG(llvm::dbgs() << "Creating preload " << *initialLoad);
10181029

10191030
SILSSAUpdater ssaUpdater;
10201031
ssaUpdater.Initialize(initialLoad->getType());
@@ -1047,6 +1058,7 @@ void LoopTreeOptimization::hoistLoadsAndStores(SILValue addr, SILLoop *loop, Ins
10471058
currentVal = SILValue();
10481059
}
10491060
if (auto *SI = isStoreToAddr(I, addr)) {
1061+
LLVM_DEBUG(llvm::dbgs() << "Deleting reloaded store " << *SI);
10501062
currentVal = SI->getSrc();
10511063
toDelete.push_back(SI);
10521064
} else if (auto *LI = isLoadFromAddr(I, addr)) {
@@ -1056,6 +1068,8 @@ void LoopTreeOptimization::hoistLoadsAndStores(SILValue addr, SILLoop *loop, Ins
10561068
currentVal = ssaUpdater.GetValueInMiddleOfBlock(block);
10571069
SILValue projectedValue = projectLoadValue(LI->getOperand(), addr,
10581070
currentVal, LI);
1071+
LLVM_DEBUG(llvm::dbgs() << "Replacing stored load " << *LI << " with "
1072+
<< projectedValue);
10591073
LI->replaceAllUsesWith(projectedValue);
10601074
toDelete.push_back(LI);
10611075
}
@@ -1068,8 +1082,11 @@ void LoopTreeOptimization::hoistLoadsAndStores(SILValue addr, SILLoop *loop, Ins
10681082
assert(succ->getSinglePredecessorBlock() &&
10691083
"should have split critical edges");
10701084
SILBuilder B(succ->begin());
1071-
B.createStore(loc.getValue(), ssaUpdater.GetValueInMiddleOfBlock(succ),
1072-
addr, StoreOwnershipQualifier::Unqualified);
1085+
auto *SI = B.createStore(loc.getValue(),
1086+
ssaUpdater.GetValueInMiddleOfBlock(succ),
1087+
addr, StoreOwnershipQualifier::Unqualified);
1088+
(void)SI;
1089+
LLVM_DEBUG(llvm::dbgs() << "Creating loop-exit store " << *SI);
10731090
}
10741091
}
10751092
}

test/SILOptimizer/licm.sil

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,3 +561,233 @@ bb6: // Preds: bb3 bb5
561561
return %19 : $() // id: %20
562562
} // end sil function 'hoist_loads_and_stores'
563563

564+
// ==================================================================
565+
// Test combined load/store hoisting/sinking with aliases
566+
567+
struct Index {
568+
@_hasStorage var value: Int64 { get set }
569+
}
570+
571+
// -----------------------------------------------------------------------------
572+
// Test combined load/store hoisting/sinking with obvious aliasing loads
573+
574+
// CHECK-LABEL: sil shared @testCombinedLdStAliasingLoad : $@convention(method) (Int64) -> Int64 {
575+
// CHECK: bb0(%0 : $Int64):
576+
// CHECK: store
577+
// CHECK-NOT: {{(load|store)}}
578+
// CHECK: bb1:
579+
// CHECK-NEXT: load %{{.*}} : $*Builtin.Int64
580+
// CHECK-NEXT: store %{{.*}} to %{{.*}} : $*Int64
581+
// CHECK-NEXT: load %{{.*}} : $*Builtin.Int64
582+
// CHECK-NEXT: cond_br
583+
// CHECK-NOT: {{(load|store)}}
584+
// CHECK-LABEL: } // end sil function 'testCombinedLdStAliasingLoad'
585+
sil shared @testCombinedLdStAliasingLoad : $@convention(method) (Int64) -> Int64 {
586+
bb0(%0 : $Int64):
587+
%zero = integer_literal $Builtin.Int64, 0
588+
%intz = struct $Int64(%zero : $Builtin.Int64)
589+
%stackAddr = alloc_stack $Index
590+
%outerAddr1 = struct_element_addr %stackAddr : $*Index, #Index.value
591+
store %intz to %outerAddr1 : $*Int64
592+
%innerAddr1 = struct_element_addr %outerAddr1 : $*Int64, #Int64._value
593+
%outerAddr2 = struct_element_addr %stackAddr : $*Index, #Index.value
594+
%innerAddr2 = struct_element_addr %outerAddr2 : $*Int64, #Int64._value
595+
br bb1
596+
597+
bb1:
598+
%val = load %innerAddr2 : $*Builtin.Int64
599+
%intv = struct $Int64(%zero : $Builtin.Int64)
600+
store %intv to %outerAddr2 : $*Int64
601+
%val2 = load %innerAddr1 : $*Builtin.Int64
602+
cond_br undef, bb2, bb3
603+
604+
bb2:
605+
br bb1
606+
607+
bb3:
608+
dealloc_stack %stackAddr : $*Index
609+
%result = struct $Int64(%val2 : $Builtin.Int64)
610+
return %result : $Int64
611+
}
612+
613+
// -----------------------------------------------------------------------------
614+
// Test combined load/store hoisting/sinking with obvious aliasing stores
615+
616+
// CHECK-LABEL: sil shared @testCombinedLdStAliasingStore : $@convention(method) (Int64) -> Int64 {
617+
// CHECK: bb0(%0 : $Int64):
618+
// CHECK: store
619+
// CHECK-NOT: {{(load|store)}}
620+
// CHECK: bb1:
621+
// CHECK-NEXT: load %{{.*}} : $*Builtin.Int64
622+
// CHECK-NEXT: store %{{.*}} to %{{.*}} : $*Int64
623+
// CHECK-NEXT: store %{{.*}} to %{{.*}} : $*Builtin.Int64
624+
// CHECK-NEXT: cond_br
625+
// CHECK-NOT: {{(load|store)}}
626+
// CHECK: load
627+
// CHECK-NOT: {{(load|store)}}
628+
// CHECK-LABEL: } // end sil function 'testCombinedLdStAliasingStore'
629+
sil shared @testCombinedLdStAliasingStore : $@convention(method) (Int64) -> Int64 {
630+
bb0(%0 : $Int64):
631+
%zero = integer_literal $Builtin.Int64, 0
632+
%intz = struct $Int64(%zero : $Builtin.Int64)
633+
%stackAddr = alloc_stack $Index
634+
%outerAddr1 = struct_element_addr %stackAddr : $*Index, #Index.value
635+
store %intz to %outerAddr1 : $*Int64
636+
%innerAddr1 = struct_element_addr %outerAddr1 : $*Int64, #Int64._value
637+
%outerAddr2 = struct_element_addr %stackAddr : $*Index, #Index.value
638+
%innerAddr2 = struct_element_addr %outerAddr2 : $*Int64, #Int64._value
639+
br bb1
640+
641+
bb1:
642+
%val = load %innerAddr2 : $*Builtin.Int64
643+
%intv = struct $Int64(%zero : $Builtin.Int64)
644+
store %intv to %outerAddr2 : $*Int64
645+
store %val to %innerAddr1 : $*Builtin.Int64
646+
cond_br undef, bb2, bb3
647+
648+
bb2:
649+
br bb1
650+
651+
bb3:
652+
dealloc_stack %stackAddr : $*Index
653+
%final = load %innerAddr2 : $*Builtin.Int64
654+
%result = struct $Int64(%final : $Builtin.Int64)
655+
return %result : $Int64
656+
}
657+
658+
// -----------------------------------------------------------------------------
659+
// Test combined load/store hoisting/sinking with unknown aliasing loads
660+
661+
// CHECK-LABEL: sil shared @testCombinedLdStUnknownLoad : $@convention(method) (Int64, Builtin.RawPointer, Builtin.RawPointer) -> Int64 {
662+
// CHECK: bb0(%0 : $Int64, %1 : $Builtin.RawPointer, %2 : $Builtin.RawPointer):
663+
// CHECK-NOT: {{(load|store)}}
664+
// CHECK: bb1:
665+
// CHECK-NEXT: load %{{.*}} : $*Builtin.Int64
666+
// CHECK-NEXT: store %{{.*}} to %{{.*}} : $*Int64
667+
// CHECK-NEXT: load %{{.*}} : $*Builtin.Int64
668+
// CHECK-NEXT: cond_br
669+
// CHECK-NOT: {{(load|store)}}
670+
// CHECK-LABEL: } // end sil function 'testCombinedLdStUnknownLoad'
671+
sil shared @testCombinedLdStUnknownLoad : $@convention(method) (Int64, Builtin.RawPointer, Builtin.RawPointer) -> Int64 {
672+
bb0(%0 : $Int64, %1 : $Builtin.RawPointer, %2 : $Builtin.RawPointer):
673+
%addr1 = pointer_to_address %1 : $Builtin.RawPointer to $*Index
674+
%addr2 = pointer_to_address %2 : $Builtin.RawPointer to $*Index
675+
%outerAddr1 = struct_element_addr %addr1 : $*Index, #Index.value
676+
%outerAddr2 = struct_element_addr %addr2 : $*Index, #Index.value
677+
%innerAddr1 = struct_element_addr %outerAddr1 : $*Int64, #Int64._value
678+
%innerAddr2 = struct_element_addr %outerAddr2 : $*Int64, #Int64._value
679+
br bb1
680+
681+
bb1:
682+
%val = load %innerAddr2 : $*Builtin.Int64
683+
store %0 to %outerAddr2 : $*Int64
684+
%val2 = load %innerAddr1 : $*Builtin.Int64
685+
cond_br undef, bb2, bb3
686+
687+
bb2:
688+
br bb1
689+
690+
bb3:
691+
%result = struct $Int64(%val2 : $Builtin.Int64)
692+
return %result : $Int64
693+
}
694+
695+
// -----------------------------------------------------------------------------
696+
// Reduced test case from rdar !!!
697+
//
698+
// Test miscompilation of BidirectionalCollection<IndexSet>._distance with
699+
// combined load/store hoisting/sinking with mutiple loads from
700+
// aliasing addresses.
701+
702+
// getRange
703+
sil @getRange : $@convention(thin) () -> Range<Int64>
704+
705+
// CHECK-LABEL: sil shared @testLICMReducedCombinedLdStExtraProjection : $@convention(method) (Int64) -> Int64 {
706+
// CHECK: bb0(%0 : $Int64):
707+
// CHECK: store %0 to %{{.*}} : $*Int64
708+
// CHECK-NOT: {{(load|store)}}
709+
// CHECK: bb1(%{{.*}} : $Builtin.Int64):
710+
// CHECK: builtin "sadd_with_overflow_Int64"
711+
// CHECK: load %{{.*}} : $*Builtin.Int64
712+
// CHECK: builtin "sadd_with_overflow_Int64"
713+
// CHECK: builtin "cmp_eq_Int64"
714+
// CHECK-NEXT: cond_br
715+
// CHECK: bb3:
716+
// CHECK: store %{{.*}} to %{{.*}} : $*Int64
717+
// CHECK: bb4:
718+
// CHECK: store %{{.*}} to %{{.*}} : $*Int64
719+
// CHECK: bb5:
720+
// CHECK: function_ref @getRange : $@convention(thin) () -> Range<Int64>
721+
// CHECK: apply %{{.*}}() : $@convention(thin) () -> Range<Int64>
722+
// CHECK: store %{{.*}} to %{{.*}} : $*Int64
723+
// CHECK: bb6:
724+
// CHECK: load %{{.*}} : $*Builtin.Int64
725+
// CHECK: builtin "cmp_eq_Int64"
726+
// CHECK: cond_br
727+
// CHECK-NOT: {{(load|store)}}
728+
// CHECK-LABEL: } // end sil function 'testLICMReducedCombinedLdStExtraProjection'
729+
sil shared @testLICMReducedCombinedLdStExtraProjection : $@convention(method) (Int64) -> Int64 {
730+
// %0 // users: %5, %1
731+
bb0(%0 : $Int64):
732+
%1 = struct_extract %0 : $Int64, #Int64._value // users: %35, %20
733+
%2 = integer_literal $Builtin.Int64, 0 // user: %9
734+
%3 = alloc_stack $Index // users: %41, %13, %4
735+
%4 = struct_element_addr %3 : $*Index, #Index.value // users: %8, %5
736+
store %0 to %4 : $*Int64 // id: %5
737+
%6 = integer_literal $Builtin.Int64, 1 // user: %11
738+
%7 = integer_literal $Builtin.Int1, -1 // user: %11
739+
%8 = struct_element_addr %4 : $*Int64, #Int64._value // user: %34
740+
br bb1(%2 : $Builtin.Int64) // id: %9
741+
742+
// %10 // user: %11
743+
bb1(%10 : $Builtin.Int64): // Preds: bb8 bb0
744+
%11 = builtin "sadd_with_overflow_Int64"(%10 : $Builtin.Int64, %6 : $Builtin.Int64, %7 : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1) // user: %12
745+
%12 = tuple_extract %11 : $(Builtin.Int64, Builtin.Int1), 0 // users: %38, %37
746+
%13 = struct_element_addr %3 : $*Index, #Index.value // users: %32, %27, %24, %14
747+
%14 = struct_element_addr %13 : $*Int64, #Int64._value // user: %15
748+
%15 = load %14 : $*Builtin.Int64 // user: %18
749+
%16 = integer_literal $Builtin.Int64, 1 // user: %18
750+
%17 = integer_literal $Builtin.Int1, -1 // user: %18
751+
%18 = builtin "sadd_with_overflow_Int64"(%15 : $Builtin.Int64, %16 : $Builtin.Int64, %17 : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1) // user: %19
752+
%19 = tuple_extract %18 : $(Builtin.Int64, Builtin.Int1), 0 // users: %26, %23, %20
753+
%20 = builtin "cmp_eq_Int64"(%19 : $Builtin.Int64, %1 : $Builtin.Int64) : $Builtin.Int1 // user: %21
754+
cond_br %20, bb2, bb3 // id: %21
755+
756+
bb2: // Preds: bb1
757+
cond_br undef, bb4, bb5 // id: %22
758+
759+
bb3: // Preds: bb1
760+
%23 = struct $Int64 (%19 : $Builtin.Int64) // user: %24
761+
store %23 to %13 : $*Int64 // id: %24
762+
br bb6 // id: %25
763+
764+
bb4: // Preds: bb2
765+
%26 = struct $Int64 (%19 : $Builtin.Int64) // user: %27
766+
store %26 to %13 : $*Int64 // id: %27
767+
br bb6 // id: %28
768+
769+
bb5: // Preds: bb2
770+
// function_ref getRange
771+
%29 = function_ref @getRange : $@convention(thin) () -> Range<Int64> // user: %30
772+
%30 = apply %29() : $@convention(thin) () -> Range<Int64> // user: %31
773+
%31 = struct_extract %30 : $Range<Int64>, #Range.lowerBound // user: %32
774+
store %31 to %13 : $*Int64 // id: %32
775+
br bb6 // id: %33
776+
777+
bb6: // Preds: bb5 bb4 bb3
778+
%34 = load %8 : $*Builtin.Int64 // user: %35
779+
%35 = builtin "cmp_eq_Int64"(%34 : $Builtin.Int64, %1 : $Builtin.Int64) : $Builtin.Int1 // user: %36
780+
cond_br %35, bb7, bb8 // id: %36
781+
782+
bb7: // Preds: bb6
783+
br bb9(%12 : $Builtin.Int64) // id: %37
784+
785+
bb8: // Preds: bb6
786+
br bb1(%12 : $Builtin.Int64) // id: %38
787+
788+
// %39 // user: %40
789+
bb9(%39 : $Builtin.Int64): // Preds: bb7
790+
%40 = struct $Int64 (%39 : $Builtin.Int64) // user: %42
791+
dealloc_stack %3 : $*Index // id: %41
792+
return %40 : $Int64 // id: %42
793+
}

test/SILOptimizer/licm_multiend.sil

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,16 @@ sil_global @_swiftEmptyArrayStorage : $_SwiftEmptyArrayStorage
3030
// CHECK: [[LOOPH]]({{.*}} : $Builtin.Int64)
3131
// CHECK: cond_br {{.*}}, [[LOOPCOND1:bb[0-9]+]], [[LOOPCOND2:bb[0-9]+]]
3232
// CHECK: [[LOOPCOND1]]:
33+
// CHECK-NEXT: store
3334
// CHECK-NEXT: cond_br {{.*}}, [[LOOPEXIT1:bb[0-9]+]], [[LOOPCONT1:bb[0-9]+]]
3435
// CHECK: [[LOOPEXIT1]]:
35-
// CHECK-NEXT: store
3636
// CHECK-NEXT: end_access [[BEGINA]] : $*Int
3737
// CHECK-NEXT: br [[LOOPAFTEREXIT:bb[0-9]+]]
3838
// CHECK: [[LOOPCOND2]]:
3939
// CHECK-NEXT: struct $Int
40+
// CHECK-NEXT: store
4041
// CHECK-NEXT: cond_br {{.*}}, [[LOOPEXIT2:bb[0-9]+]], [[LOOPCONT1]]
4142
// CHECK: [[LOOPEXIT2]]:
42-
// CHECK-NEXT: store
4343
// CHECK-NEXT: end_access [[BEGINA]] : $*Int
4444
// CHECK-NEXT: br [[LOOPAFTEREXIT]]
4545
// CHECK: [[LOOPCONT1]]:

0 commit comments

Comments
 (0)