Skip to content

Commit 62cd5d5

Browse files
committed
[VPlan] Be more careful with CSE in replicate regions.
Recipes in replicate regions implicitly depend on the region's predicate. Limit CSE to recipes in the same block, when either recipe is in a replicate region. This allows handling VPPredInstPHIRecipe during CSE. If we perform CSE on recipes inside a replicate region, we may end up with 2 VPPredInstPHIRecipes sharing the same operand. This is incompatible with current VPPredInstPHIRecipe codegen, which re-sets the current value of its operand in VPTransformState. This can cause crashes in the added test cases. Note that this patch only modifies ::isEqual to check for replicating regions and not getHash, as CSE across replicating regions should be uncommon. Fixes #157314. Fixes #161974.
1 parent e9f3be6 commit 62cd5d5

File tree

2 files changed

+178
-0
lines changed

2 files changed

+178
-0
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1979,6 +1979,12 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
19791979
.Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
19801980
VPWidenSelectRecipe, VPWidenGEPRecipe, VPReplicateRecipe>(
19811981
[](auto *I) { return std::make_pair(false, I->getOpcode()); })
1982+
.Case<VPPredInstPHIRecipe>([](auto *I) {
1983+
// Treat VPPredInstPHIRecipe as Instruction::PHI for CSE. This is only
1984+
// safe, if they are in the same block and hence share the same
1985+
// predicate.
1986+
return std::make_pair(false, Instruction::PHI);
1987+
})
19821988
.Case<VPWidenIntrinsicRecipe>([](auto *I) {
19831989
return std::make_pair(true, I->getVectorIntrinsicID());
19841990
})
@@ -2053,6 +2059,15 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
20532059
LFlags->getPredicate() !=
20542060
cast<VPRecipeWithIRFlags>(R)->getPredicate())
20552061
return false;
2062+
// Recipes in replicate regions implicitly depend on predicate. If either
2063+
// recipe is in a replicate region, only consider them equal if both have
2064+
// the same parent.
2065+
const VPRegionBlock *RegionL = L->getParent()->getParent();
2066+
const VPRegionBlock *RegionR = R->getParent()->getParent();
2067+
if (((RegionL && RegionL->isReplicator()) ||
2068+
(RegionR && RegionR->isReplicator())) &&
2069+
L->getParent() != R->getParent())
2070+
return false;
20562071
const VPlan *Plan = L->getParent()->getPlan();
20572072
VPTypeAnalysis TypeInfo(*Plan);
20582073
return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
2+
; RUN: opt -p loop-vectorize -force-vector-width=2 -force-widen-divrem-via-safe-divisor=false -S %s | FileCheck %s
3+
4+
define void @multiple_vppredinstphi_with_same_predicate(ptr %A, i32 %d) {
5+
; CHECK-LABEL: define void @multiple_vppredinstphi_with_same_predicate(
6+
; CHECK-SAME: ptr [[A:%.*]], i32 [[D:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
9+
; CHECK: [[VECTOR_PH]]:
10+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
11+
; CHECK: [[VECTOR_BODY]]:
12+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SDIV_CONTINUE2:.*]] ]
13+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]]
14+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4
15+
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], zeroinitializer
16+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
17+
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_SDIV_IF:.*]], label %[[PRED_SDIV_CONTINUE:.*]]
18+
; CHECK: [[PRED_SDIV_IF]]:
19+
; CHECK-NEXT: [[TMP3:%.*]] = sdiv i32 -10, [[D]]
20+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i32 0
21+
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE]]
22+
; CHECK: [[PRED_SDIV_CONTINUE]]:
23+
; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP4]], %[[PRED_SDIV_IF]] ]
24+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
25+
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_SDIV_IF1:.*]], label %[[PRED_SDIV_CONTINUE2]]
26+
; CHECK: [[PRED_SDIV_IF1]]:
27+
; CHECK-NEXT: [[TMP7:%.*]] = sdiv i32 -10, [[D]]
28+
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP7]], i32 1
29+
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE2]]
30+
; CHECK: [[PRED_SDIV_CONTINUE2]]:
31+
; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ [[TMP5]], %[[PRED_SDIV_CONTINUE]] ], [ [[TMP8]], %[[PRED_SDIV_IF1]] ]
32+
; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP9]], [[TMP9]]
33+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP10]], <2 x i32> zeroinitializer
34+
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP0]], align 4
35+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
36+
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
37+
; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
38+
; CHECK: [[MIDDLE_BLOCK]]:
39+
; CHECK-NEXT: br label %[[EXIT:.*]]
40+
; CHECK: [[EXIT]]:
41+
; CHECK-NEXT: ret void
42+
;
43+
entry:
44+
br label %loop.header
45+
46+
loop.header:
47+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
48+
%gep.A = getelementptr inbounds i32, ptr %A, i32 %iv
49+
%l = load i32, ptr %gep.A
50+
%c = icmp sgt i32 %l, 0
51+
br i1 %c, label %then, label %loop.latch
52+
53+
then:
54+
%div.0 = sdiv i32 -10, %d
55+
%div.1 = sdiv i32 -10, %d
56+
%add = add i32 %div.1, %div.0
57+
br label %loop.latch
58+
59+
loop.latch:
60+
%merge = phi i32 [ %add, %then ], [ 0, %loop.header ]
61+
store i32 %merge, ptr %gep.A
62+
%iv.next = add i32 %iv, 1
63+
%ec = icmp eq i32 %iv.next, 100
64+
br i1 %ec, label %exit, label %loop.header
65+
66+
exit:
67+
ret void
68+
}
69+
70+
define void @multiple_vppredinstphi_with_different_predicate(ptr %A, i32 %d) {
71+
; CHECK-LABEL: define void @multiple_vppredinstphi_with_different_predicate(
72+
; CHECK-SAME: ptr [[A:%.*]], i32 [[D:%.*]]) {
73+
; CHECK-NEXT: [[ENTRY:.*:]]
74+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
75+
; CHECK: [[VECTOR_PH]]:
76+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
77+
; CHECK: [[VECTOR_BODY]]:
78+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SDIV_CONTINUE6:.*]] ]
79+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]]
80+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4
81+
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], zeroinitializer
82+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
83+
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_SDIV_IF:.*]], label %[[PRED_SDIV_CONTINUE:.*]]
84+
; CHECK: [[PRED_SDIV_IF]]:
85+
; CHECK-NEXT: [[TMP3:%.*]] = sdiv i32 -10, [[D]]
86+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i32 0
87+
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE]]
88+
; CHECK: [[PRED_SDIV_CONTINUE]]:
89+
; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP4]], %[[PRED_SDIV_IF]] ]
90+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
91+
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_SDIV_IF1:.*]], label %[[PRED_SDIV_CONTINUE2:.*]]
92+
; CHECK: [[PRED_SDIV_IF1]]:
93+
; CHECK-NEXT: [[TMP7:%.*]] = sdiv i32 -10, [[D]]
94+
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP7]], i32 1
95+
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE2]]
96+
; CHECK: [[PRED_SDIV_CONTINUE2]]:
97+
; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ [[TMP5]], %[[PRED_SDIV_CONTINUE]] ], [ [[TMP8]], %[[PRED_SDIV_IF1]] ]
98+
; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP1]], splat (i1 true)
99+
; CHECK-NEXT: [[TMP11:%.*]] = or <2 x i1> [[TMP1]], [[TMP10]]
100+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP9]], <2 x i32> zeroinitializer
101+
; CHECK-NEXT: [[TMP12:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], splat (i32 20)
102+
; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP11]], <2 x i1> [[TMP12]], <2 x i1> zeroinitializer
103+
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0
104+
; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_SDIV_IF3:.*]], label %[[PRED_SDIV_CONTINUE4:.*]]
105+
; CHECK: [[PRED_SDIV_IF3]]:
106+
; CHECK-NEXT: [[TMP15:%.*]] = sdiv i32 -10, [[D]]
107+
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> poison, i32 [[TMP15]], i32 0
108+
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE4]]
109+
; CHECK: [[PRED_SDIV_CONTINUE4]]:
110+
; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ poison, %[[PRED_SDIV_CONTINUE2]] ], [ [[TMP16]], %[[PRED_SDIV_IF3]] ]
111+
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1
112+
; CHECK-NEXT: br i1 [[TMP18]], label %[[PRED_SDIV_IF5:.*]], label %[[PRED_SDIV_CONTINUE6]]
113+
; CHECK: [[PRED_SDIV_IF5]]:
114+
; CHECK-NEXT: [[TMP19:%.*]] = sdiv i32 -10, [[D]]
115+
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> [[TMP17]], i32 [[TMP19]], i32 1
116+
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE6]]
117+
; CHECK: [[PRED_SDIV_CONTINUE6]]:
118+
; CHECK-NEXT: [[TMP21:%.*]] = phi <2 x i32> [ [[TMP17]], %[[PRED_SDIV_CONTINUE4]] ], [ [[TMP20]], %[[PRED_SDIV_IF5]] ]
119+
; CHECK-NEXT: [[PREDPHI7:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[TMP21]], <2 x i32> zeroinitializer
120+
; CHECK-NEXT: [[TMP22:%.*]] = add <2 x i32> [[PREDPHI]], [[PREDPHI7]]
121+
; CHECK-NEXT: store <2 x i32> [[TMP22]], ptr [[TMP0]], align 4
122+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
123+
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
124+
; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
125+
; CHECK: [[MIDDLE_BLOCK]]:
126+
; CHECK-NEXT: br label %[[EXIT:.*]]
127+
; CHECK: [[EXIT]]:
128+
; CHECK-NEXT: ret void
129+
;
130+
entry:
131+
br label %loop.header
132+
133+
loop.header:
134+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
135+
%gep.A = getelementptr inbounds i32, ptr %A, i32 %iv
136+
%l = load i32, ptr %gep.A
137+
%c.0 = icmp sgt i32 %l, 0
138+
br i1 %c.0, label %then.0, label %continue
139+
140+
then.0:
141+
%div.0 = sdiv i32 -10, %d
142+
br label %continue
143+
144+
continue:
145+
%merge.0 = phi i32 [ %div.0, %then.0 ], [ 0, %loop.header ]
146+
%c.1 = icmp sgt i32 %l, 20
147+
br i1 %c.1, label %then.1, label %loop.latch
148+
149+
then.1:
150+
%div.1 = sdiv i32 -10, %d
151+
br label %loop.latch
152+
153+
loop.latch:
154+
%merge.1 = phi i32 [ %div.1, %then.1 ], [ 0, %continue ]
155+
%add = add i32 %merge.0, %merge.1
156+
store i32 %add, ptr %gep.A
157+
%iv.next = add i32 %iv, 1
158+
%ec = icmp eq i32 %iv.next, 100
159+
br i1 %ec, label %exit, label %loop.header
160+
161+
exit:
162+
ret void
163+
}

0 commit comments

Comments
 (0)