Skip to content

Commit c61a481

Browse files
authored
[VPlan] Use SCEV to prove non-aliasing for stores at different offsets. (#170347)
Extend the logic add in llvm/llvm-project#168771 to also allow sinking stores past stores in the same noalias set by checking if we can prove no-alias via the distance between accesses, checked via SCEV. PR: llvm/llvm-project#170347
1 parent 1a66474 commit c61a481

File tree

2 files changed

+99
-45
lines changed

2 files changed

+99
-45
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 81 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444

4545
using namespace llvm;
4646
using namespace VPlanPatternMatch;
47+
using namespace SCEVPatternMatch;
4748

4849
bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
4950
VPlan &Plan,
@@ -139,14 +140,77 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
139140
return true;
140141
}
141142

142-
// Check if a memory operation doesn't alias with memory operations in blocks
143-
// between FirstBB and LastBB using scoped noalias metadata.
144-
// For load hoisting, we only check writes in one direction.
145-
// For store sinking, we check both reads and writes bidirectionally.
146-
static bool canHoistOrSinkWithNoAliasCheck(
147-
const MemoryLocation &MemLoc, VPBasicBlock *FirstBB, VPBasicBlock *LastBB,
148-
bool CheckReads,
149-
const SmallPtrSetImpl<VPRecipeBase *> *ExcludeRecipes = nullptr) {
143+
/// Helper for extra no-alias checks via known-safe recipe and SCEV.
144+
class SinkStoreInfo {
145+
const SmallPtrSetImpl<VPRecipeBase *> &ExcludeRecipes;
146+
VPReplicateRecipe &GroupLeader;
147+
ScalarEvolution &SE;
148+
const Loop &L;
149+
VPTypeAnalysis &TypeInfo;
150+
151+
// Return true if \p A and \p B are known to not alias for all VFs in the
152+
// plan, checked via the distance between the accesses
153+
bool isNoAliasViaDistance(VPReplicateRecipe *A, VPReplicateRecipe *B) const {
154+
if (A->getOpcode() != Instruction::Store ||
155+
B->getOpcode() != Instruction::Store)
156+
return false;
157+
158+
VPValue *AddrA = A->getOperand(1);
159+
const SCEV *SCEVA = vputils::getSCEVExprForVPValue(AddrA, SE, &L);
160+
VPValue *AddrB = B->getOperand(1);
161+
const SCEV *SCEVB = vputils::getSCEVExprForVPValue(AddrB, SE, &L);
162+
if (isa<SCEVCouldNotCompute>(SCEVA) || isa<SCEVCouldNotCompute>(SCEVB))
163+
return false;
164+
165+
const APInt *Distance;
166+
if (!match(SE.getMinusSCEV(SCEVA, SCEVB), m_scev_APInt(Distance)))
167+
return false;
168+
169+
const DataLayout &DL = SE.getDataLayout();
170+
Type *TyA = TypeInfo.inferScalarType(A->getOperand(0));
171+
uint64_t SizeA = DL.getTypeStoreSize(TyA);
172+
Type *TyB = TypeInfo.inferScalarType(B->getOperand(0));
173+
uint64_t SizeB = DL.getTypeStoreSize(TyB);
174+
175+
// Use the maximum store size to ensure no overlap from either direction.
176+
// Currently only handles fixed sizes, as it is only used for
177+
// replicating VPReplicateRecipes.
178+
uint64_t MaxStoreSize = std::max(SizeA, SizeB);
179+
180+
auto VFs = B->getParent()->getPlan()->vectorFactors();
181+
ElementCount MaxVF = *max_element(VFs, ElementCount::isKnownLT);
182+
return Distance->abs().uge(
183+
MaxVF.multiplyCoefficientBy(MaxStoreSize).getFixedValue());
184+
}
185+
186+
public:
187+
SinkStoreInfo(const SmallPtrSetImpl<VPRecipeBase *> &ExcludeRecipes,
188+
VPReplicateRecipe &GroupLeader, ScalarEvolution &SE,
189+
const Loop &L, VPTypeAnalysis &TypeInfo)
190+
: ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), SE(SE), L(L),
191+
TypeInfo(TypeInfo) {}
192+
193+
/// Return true if \p R should be skipped during alias checking, either
194+
/// because it's in the exclude set or because no-alias can be proven via
195+
/// SCEV.
196+
bool shouldSkip(VPRecipeBase &R) const {
197+
auto *Store = dyn_cast<VPReplicateRecipe>(&R);
198+
return ExcludeRecipes.contains(&R) ||
199+
(Store && isNoAliasViaDistance(Store, &GroupLeader));
200+
}
201+
};
202+
203+
/// Check if a memory operation doesn't alias with memory operations in blocks
204+
/// between \p FirstBB and \p LastBB using scoped noalias metadata. If
205+
/// \p SinkInfo is std::nullopt, only recipes that may write to memory are
206+
/// checked (for load hoisting). Otherwise recipes that both read and write
207+
/// memory are checked, and SCEV is used to prove no-alias between the group
208+
/// leader and other replicate recipes (for store sinking).
209+
static bool
210+
canHoistOrSinkWithNoAliasCheck(const MemoryLocation &MemLoc,
211+
VPBasicBlock *FirstBB, VPBasicBlock *LastBB,
212+
std::optional<SinkStoreInfo> SinkInfo = {}) {
213+
bool CheckReads = SinkInfo.has_value();
150214
if (!MemLoc.AATags.Scope)
151215
return false;
152216

@@ -158,7 +222,7 @@ static bool canHoistOrSinkWithNoAliasCheck(
158222
"Expected at most one successor in block chain");
159223
auto *VPBB = cast<VPBasicBlock>(Block);
160224
for (VPRecipeBase &R : *VPBB) {
161-
if (ExcludeRecipes && ExcludeRecipes->contains(&R))
225+
if (SinkInfo && SinkInfo->shouldSkip(R))
162226
continue;
163227

164228
// Skip recipes that don't need checking.
@@ -4273,8 +4337,7 @@ void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE,
42734337

42744338
// Check that the load doesn't alias with stores between first and last.
42754339
auto LoadLoc = vputils::getMemoryLocation(*EarliestLoad);
4276-
if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, FirstBB, LastBB,
4277-
/*CheckReads=*/false))
4340+
if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, FirstBB, LastBB))
42784341
continue;
42794342

42804343
// Collect common metadata from all loads in the group.
@@ -4301,7 +4364,9 @@ void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE,
43014364
}
43024365

43034366
static bool
4304-
canSinkStoreWithNoAliasCheck(ArrayRef<VPReplicateRecipe *> StoresToSink) {
4367+
canSinkStoreWithNoAliasCheck(ArrayRef<VPReplicateRecipe *> StoresToSink,
4368+
ScalarEvolution &SE, const Loop &L,
4369+
VPTypeAnalysis &TypeInfo) {
43054370
auto StoreLoc = vputils::getMemoryLocation(*StoresToSink.front());
43064371
if (!StoreLoc || !StoreLoc->AATags.Scope)
43074372
return false;
@@ -4313,8 +4378,8 @@ canSinkStoreWithNoAliasCheck(ArrayRef<VPReplicateRecipe *> StoresToSink) {
43134378

43144379
VPBasicBlock *FirstBB = StoresToSink.front()->getParent();
43154380
VPBasicBlock *LastBB = StoresToSink.back()->getParent();
4316-
return canHoistOrSinkWithNoAliasCheck(*StoreLoc, FirstBB, LastBB,
4317-
/*CheckReads=*/true, &StoresToSinkSet);
4381+
SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], SE, L, TypeInfo);
4382+
return canHoistOrSinkWithNoAliasCheck(*StoreLoc, FirstBB, LastBB, SinkInfo);
43184383
}
43194384

43204385
void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE,
@@ -4325,13 +4390,14 @@ void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE,
43254390
return;
43264391

43274392
VPDominatorTree VPDT(Plan);
4393+
VPTypeAnalysis TypeInfo(Plan);
43284394

43294395
for (auto &Group : Groups) {
43304396
sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
43314397
return VPDT.properlyDominates(A, B);
43324398
});
43334399

4334-
if (!canSinkStoreWithNoAliasCheck(Group))
4400+
if (!canSinkStoreWithNoAliasCheck(Group, SE, *L, TypeInfo))
43354401
continue;
43364402

43374403
// Use the last (most dominated) store's location for the unconditional

llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll

Lines changed: 18 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -764,7 +764,7 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) {
764764
; CHECK: [[VECTOR_PH]]:
765765
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
766766
; CHECK: [[VECTOR_BODY]]:
767-
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ]
767+
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE3:.*]] ]
768768
; CHECK-NEXT: [[INDEX:%.*]] = mul i64 [[INDEX1]], 16
769769
; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
770770
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 16
@@ -781,42 +781,30 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) {
781781
; CHECK-NEXT: [[TMP14:%.*]] = load double, ptr [[TMP22]], align 8, !alias.scope [[META78]]
782782
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i32 0
783783
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = insertelement <2 x double> [[TMP15]], double [[TMP14]], i32 1
784-
; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i1> [[TMP10]], splat (i1 true)
785784
; CHECK-NEXT: [[TMP34:%.*]] = fadd <2 x double> [[WIDE_LOAD]], splat (double 8.000000e+00)
786-
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[TMP16]], i32 0
787-
; CHECK-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
788-
; CHECK: [[PRED_STORE_IF]]:
789785
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]]
790-
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP34]], i32 0
791-
; CHECK-NEXT: store double [[TMP19]], ptr [[TMP18]], align 8, !alias.scope [[META81:![0-9]+]], !noalias [[META78]]
786+
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
787+
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP18]], i32 0
788+
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> [[TMP31]], ptr [[TMP21]], i32 1
789+
; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP10]], <2 x double> [[WIDE_LOAD]], <2 x double> [[TMP34]]
790+
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x double> [[TMP20]], i32 0
791+
; CHECK-NEXT: store double [[TMP32]], ptr [[TMP18]], align 8, !alias.scope [[META81:![0-9]+]], !noalias [[META78]]
792+
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x double> [[TMP20]], i32 1
793+
; CHECK-NEXT: store double [[TMP33]], ptr [[TMP21]], align 8, !alias.scope [[META81]], !noalias [[META78]]
794+
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
795+
; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
796+
; CHECK: [[PRED_STORE_IF]]:
797+
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP18]], i64 16
798+
; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP24]], align 8, !alias.scope [[META81]], !noalias [[META78]]
792799
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
793800
; CHECK: [[PRED_STORE_CONTINUE]]:
794-
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP16]], i32 1
795-
; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3:.*]]
801+
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
802+
; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3]]
796803
; CHECK: [[PRED_STORE_IF2]]:
797-
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
798-
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x double> [[TMP34]], i32 1
799-
; CHECK-NEXT: store double [[TMP33]], ptr [[TMP21]], align 8, !alias.scope [[META81]], !noalias [[META78]]
804+
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr [[TMP21]], i64 16
805+
; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP35]], align 8, !alias.scope [[META81]], !noalias [[META78]]
800806
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE3]]
801807
; CHECK: [[PRED_STORE_CONTINUE3]]:
802-
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
803-
; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]]
804-
; CHECK: [[PRED_STORE_IF4]]:
805-
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]]
806-
; CHECK-NEXT: store double [[TMP13]], ptr [[TMP31]], align 8, !alias.scope [[META81]], !noalias [[META78]]
807-
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[TMP31]], i64 16
808-
; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP37]], align 8, !alias.scope [[META81]], !noalias [[META78]]
809-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE5]]
810-
; CHECK: [[PRED_STORE_CONTINUE5]]:
811-
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
812-
; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7]]
813-
; CHECK: [[PRED_STORE_IF6]]:
814-
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
815-
; CHECK-NEXT: store double [[TMP14]], ptr [[TMP32]], align 8, !alias.scope [[META81]], !noalias [[META78]]
816-
; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16
817-
; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP47]], align 8, !alias.scope [[META81]], !noalias [[META78]]
818-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]]
819-
; CHECK: [[PRED_STORE_CONTINUE7]]:
820808
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2
821809
; CHECK-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
822810
; CHECK-NEXT: br i1 [[TMP52]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP83:![0-9]+]]

0 commit comments

Comments
 (0)