Skip to content

Commit b097df3

Browse files
artagnonLukacma
authored andcommitted
[VPlan] Introduce cannotHoistOrSinkRecipe, fix miscompile (llvm#162674)
Factor out common code to determine legality of hoisting and sinking. The patch has the side-effect of fixing an underlying bug, where a load/store pair is reordered.
1 parent e225740 commit b097df3

File tree

3 files changed

+102
-27
lines changed

3 files changed

+102
-27
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
7070
return cast<VPWidenIntrinsicRecipe>(this)->mayWriteToMemory();
7171
case VPCanonicalIVPHISC:
7272
case VPBranchOnMaskSC:
73+
case VPDerivedIVSC:
7374
case VPFirstOrderRecurrencePHISC:
7475
case VPReductionPHISC:
7576
case VPScalarIVStepsSC:
@@ -86,6 +87,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
8687
case VPWidenLoadEVLSC:
8788
case VPWidenLoadSC:
8889
case VPWidenPHISC:
90+
case VPWidenPointerInductionSC:
8991
case VPWidenSC:
9092
case VPWidenSelectSC: {
9193
const Instruction *I =
@@ -119,6 +121,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
119121
case VPWidenIntrinsicSC:
120122
return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory();
121123
case VPBranchOnMaskSC:
124+
case VPDerivedIVSC:
122125
case VPFirstOrderRecurrencePHISC:
123126
case VPPredInstPHISC:
124127
case VPScalarIVStepsSC:
@@ -134,6 +137,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
134137
case VPWidenGEPSC:
135138
case VPWidenIntOrFpInductionSC:
136139
case VPWidenPHISC:
140+
case VPWidenPointerInductionSC:
137141
case VPWidenSC:
138142
case VPWidenSelectSC: {
139143
const Instruction *I =

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 22 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,24 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
131131
return true;
132132
}
133133

134+
/// Return true if we do not know how to (mechanically) hoist or sink \p R out
135+
/// of a loop region.
136+
static bool cannotHoistOrSinkRecipe(const VPRecipeBase &R) {
137+
// Assumes don't alias anything or throw; as long as they're guaranteed to
138+
// execute, they're safe to hoist.
139+
if (match(&R, m_Intrinsic<Intrinsic::assume>()))
140+
return false;
141+
142+
// TODO: Relax checks in the future, e.g. we could also hoist reads, if their
143+
// memory location is not modified in the vector loop.
144+
if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
145+
return true;
146+
147+
// Allocas cannot be hoisted.
148+
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
149+
return RepR && RepR->getOpcode() == Instruction::Alloca;
150+
}
151+
134152
static bool sinkScalarOperands(VPlan &Plan) {
135153
auto Iter = vp_depth_first_deep(Plan.getEntry());
136154
bool Changed = false;
@@ -1826,7 +1844,7 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
18261844
VPDT.properlyDominates(Previous, SinkCandidate))
18271845
return true;
18281846

1829-
if (SinkCandidate->mayHaveSideEffects())
1847+
if (cannotHoistOrSinkRecipe(*SinkCandidate))
18301848
return false;
18311849

18321850
WorkList.push_back(SinkCandidate);
@@ -1866,7 +1884,7 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
18661884
static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
18671885
VPRecipeBase *Previous,
18681886
VPDominatorTree &VPDT) {
1869-
if (Previous->mayHaveSideEffects() || Previous->mayReadFromMemory())
1887+
if (cannotHoistOrSinkRecipe(*Previous))
18701888
return false;
18711889

18721890
// Collect recipes that need hoisting.
@@ -1913,11 +1931,6 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
19131931
return nullptr;
19141932
return HoistCandidate;
19151933
};
1916-
auto CanHoist = [&](VPRecipeBase *HoistCandidate) {
1917-
// Avoid hoisting candidates with side-effects, as we do not yet analyze
1918-
// associated dependencies.
1919-
return !HoistCandidate->mayHaveSideEffects();
1920-
};
19211934

19221935
if (!NeedsHoisting(Previous->getVPSingleValue()))
19231936
return true;
@@ -1929,7 +1942,7 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
19291942
VPRecipeBase *Current = HoistCandidates[I];
19301943
assert(Current->getNumDefinedValues() == 1 &&
19311944
"only recipes with a single defined value expected");
1932-
if (!CanHoist(Current))
1945+
if (cannotHoistOrSinkRecipe(*Current))
19331946
return false;
19341947

19351948
for (VPValue *Op : Current->operands()) {
@@ -2144,24 +2157,6 @@ void VPlanTransforms::cse(VPlan &Plan) {
21442157
static void licm(VPlan &Plan) {
21452158
VPBasicBlock *Preheader = Plan.getVectorPreheader();
21462159

2147-
// Return true if we do not know how to (mechanically) hoist a given recipe
2148-
// out of a loop region.
2149-
auto CannotHoistRecipe = [](VPRecipeBase &R) {
2150-
// Assumes don't alias anything or throw; as long as they're guaranteed to
2151-
// execute, they're safe to hoist.
2152-
if (match(&R, m_Intrinsic<Intrinsic::assume>()))
2153-
return false;
2154-
2155-
// TODO: Relax checks in the future, e.g. we could also hoist reads, if
2156-
// their memory location is not modified in the vector loop.
2157-
if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
2158-
return true;
2159-
2160-
// Allocas cannot be hoisted.
2161-
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
2162-
return RepR && RepR->getOpcode() == Instruction::Alloca;
2163-
};
2164-
21652160
// Hoist any loop invariant recipes from the vector loop region to the
21662161
// preheader. Preform a shallow traversal of the vector loop region, to
21672162
// exclude recipes in replicate regions. Since the top-level blocks in the
@@ -2173,7 +2168,7 @@ static void licm(VPlan &Plan) {
21732168
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
21742169
vp_depth_first_shallow(LoopRegion->getEntry()))) {
21752170
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
2176-
if (CannotHoistRecipe(R))
2171+
if (cannotHoistOrSinkRecipe(R))
21772172
continue;
21782173
if (any_of(R.operands(), [](VPValue *Op) {
21792174
return !Op->isDefinedOutsideLoopRegions();

llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,3 +425,79 @@ loop:
425425
exit:
426426
ret void
427427
}
428+
429+
define void @hoist_previous_value_and_operand_load(ptr %dst) {
430+
; CHECK-LABEL: @hoist_previous_value_and_operand_load(
431+
; CHECK-NEXT: entry:
432+
; CHECK-NEXT: br label [[LOOP:%.*]]
433+
; CHECK: loop:
434+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[LOOP]] ]
435+
; CHECK-NEXT: [[FOR_1:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[LOAD:%.*]], [[LOOP]] ]
436+
; CHECK-NEXT: [[FOR_2:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[OR:%.*]], [[LOOP]] ]
437+
; CHECK-NEXT: [[OR]] = or i32 [[FOR_1]], 3
438+
; CHECK-NEXT: [[ADD]] = add i64 [[IV]], 1
439+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[IV]]
440+
; CHECK-NEXT: store i32 [[FOR_2]], ptr [[GEP]], align 4
441+
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[IV]], 337
442+
; CHECK-NEXT: [[LOAD]] = load i32, ptr [[DST]], align 4
443+
; CHECK-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT:%.*]]
444+
; CHECK: exit:
445+
; CHECK-NEXT: ret void
446+
;
447+
entry:
448+
br label %loop
449+
450+
loop:
451+
%iv = phi i64 [ 1, %entry ], [ %add, %loop ]
452+
%for.1 = phi i32 [ 1, %entry ], [ %load, %loop ]
453+
%for.2 = phi i32 [ 0, %entry ], [ %or, %loop ]
454+
%or = or i32 %for.1, 3
455+
%add = add i64 %iv, 1
456+
%gep = getelementptr inbounds i32, ptr %dst, i64 %iv
457+
store i32 %for.2, ptr %gep
458+
%icmp = icmp ult i64 %iv, 337
459+
%load = load i32, ptr %dst
460+
br i1 %icmp, label %loop, label %exit
461+
462+
exit:
463+
ret void
464+
}
465+
466+
define void @hoist_previous_value_and_operand_assume(ptr %dst) {
467+
; CHECK-LABEL: @hoist_previous_value_and_operand_assume(
468+
; CHECK-NEXT: entry:
469+
; CHECK-NEXT: br label [[LOOP:%.*]]
470+
; CHECK: loop:
471+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[LOOP]] ]
472+
; CHECK-NEXT: [[FOR_1:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[TRUNC:%.*]], [[LOOP]] ]
473+
; CHECK-NEXT: [[FOR_2:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[OR:%.*]], [[LOOP]] ]
474+
; CHECK-NEXT: [[OR]] = or i1 [[FOR_1]], true
475+
; CHECK-NEXT: [[ADD]] = add i64 [[IV]], 1
476+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[IV]]
477+
; CHECK-NEXT: store i1 [[FOR_2]], ptr [[GEP]], align 1
478+
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[IV]], 337
479+
; CHECK-NEXT: call void @llvm.assume(i1 [[FOR_1]])
480+
; CHECK-NEXT: [[TRUNC]] = trunc i64 [[IV]] to i1
481+
; CHECK-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT:%.*]]
482+
; CHECK: exit:
483+
; CHECK-NEXT: ret void
484+
;
485+
entry:
486+
br label %loop
487+
488+
loop:
489+
%iv = phi i64 [ 1, %entry ], [ %add, %loop ]
490+
%for.1 = phi i1 [ 1, %entry ], [ %trunc, %loop ]
491+
%for.2 = phi i1 [ 0, %entry ], [ %or, %loop ]
492+
%or = or i1 %for.1, 3
493+
%add = add i64 %iv, 1
494+
%gep = getelementptr inbounds i32, ptr %dst, i64 %iv
495+
store i1 %for.2, ptr %gep
496+
%icmp = icmp ult i64 %iv, 337
497+
call void @llvm.assume(i1 %for.1)
498+
%trunc = trunc i64 %iv to i1
499+
br i1 %icmp, label %loop, label %exit
500+
501+
exit:
502+
ret void
503+
}

0 commit comments

Comments
 (0)