Skip to content

Commit 41a2dfc

Browse files
committed
[VPlan] Allow multiple users of (broadcast %evl).
CSE may replace multiple redundant broadcasts of EVL with a single broadcast which may have more than 1 user. Adjust the verifier to allow this. Fixes a crash when building llvm-test-suite with EVL: https://lab.llvm.org/buildbot/#/builders/210/builds/3303
1 parent 08c057e commit 41a2dfc

File tree

2 files changed

+94
-1
lines changed

2 files changed

+94
-1
lines changed

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
199199
// EVLIVIncrement is only used by EVLIV & BranchOnCount.
200200
// Having more than two users is unexpected.
201201
using namespace llvm::VPlanPatternMatch;
202-
if ((I->getNumUsers() != 1) &&
202+
if (I->getOpcode() != VPInstruction::Broadcast &&
203+
I->getNumUsers() != 1 &&
203204
(I->getNumUsers() != 2 ||
204205
none_of(I->users(), match_fn(m_BranchOnCount(m_Specific(I),
205206
m_VPValue()))))) {

llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,97 @@ exit:
192192
ret void
193193
}
194194

195+
define void @redundant_iv_trunc_for_cse(ptr noalias %src, ptr noalias %dst, i64 %n) #0 {
196+
; CHECK-LABEL: define void @redundant_iv_trunc_for_cse(
197+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
198+
; CHECK-NEXT: [[ENTRY:.*:]]
199+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
200+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
201+
; CHECK: [[VECTOR_PH]]:
202+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
203+
; CHECK-NEXT: [[TMP2:%.*]] = mul <vscale x 4 x i32> [[TMP1]], splat (i32 1)
204+
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP2]]
205+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
206+
; CHECK: [[VECTOR_BODY]]:
207+
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
208+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
209+
; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], %[[VECTOR_BODY]] ]
210+
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
211+
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
212+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP3]], i64 0
213+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
214+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[EVL_BASED_IV]]
215+
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP4]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP3]])
216+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <vscale x 4 x i32> [[VP_OP_LOAD]], zeroinitializer
217+
; CHECK-NEXT: [[TMP6:%.*]] = shl <vscale x 4 x i32> [[VEC_IND1]], splat (i32 16)
218+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i32> [[TMP6]], <vscale x 4 x i32> [[VEC_IND]]
219+
; CHECK-NEXT: [[TMP7:%.*]] = trunc <vscale x 4 x i32> [[PREDPHI]] to <vscale x 4 x i8>
220+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[EVL_BASED_IV]]
221+
; CHECK-NEXT: call void @llvm.vp.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP7]], ptr align 1 [[TMP8]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP3]])
222+
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP3]] to i64
223+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]]
224+
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]]
225+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
226+
; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <vscale x 4 x i32> [[VEC_IND1]], [[BROADCAST_SPLAT]]
227+
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
228+
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
229+
; CHECK: [[MIDDLE_BLOCK]]:
230+
; CHECK-NEXT: br label %[[EXIT:.*]]
231+
; CHECK: [[SCALAR_PH:.*]]:
232+
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
233+
; CHECK: [[LOOP_HEADER]]:
234+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
235+
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]]
236+
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
237+
; CHECK-NEXT: [[C_0:%.*]] = icmp eq i32 [[L]], 0
238+
; CHECK-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[IV]] to i32
239+
; CHECK-NEXT: br i1 [[C_0]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
240+
; CHECK: [[THEN]]:
241+
; CHECK-NEXT: [[TRUNC_IV_2:%.*]] = trunc i64 [[IV]] to i32
242+
; CHECK-NEXT: [[SHL_IV:%.*]] = shl i32 [[TRUNC_IV_2]], 16
243+
; CHECK-NEXT: br label %[[LOOP_LATCH]]
244+
; CHECK: [[LOOP_LATCH]]:
245+
; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[SHL_IV]], %[[THEN]] ], [ [[TRUNC_IV]], %[[LOOP_HEADER]] ]
246+
; CHECK-NEXT: [[TRUNC_P:%.*]] = trunc i32 [[P]] to i8
247+
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV]]
248+
; CHECK-NEXT: store i8 [[TRUNC_P]], ptr [[GEP_DST]], align 1
249+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
250+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
251+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
252+
; CHECK: [[EXIT]]:
253+
; CHECK-NEXT: ret void
254+
;
255+
entry:
256+
br label %loop.header
257+
258+
loop.header:
259+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
260+
%gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
261+
%l = load i32, ptr %gep.src
262+
%c.0 = icmp eq i32 %l, 0
263+
%trunc.iv = trunc i64 %iv to i32
264+
br i1 %c.0, label %then, label %loop.latch
265+
266+
then:
267+
%trunc.iv.2 = trunc i64 %iv to i32
268+
%shl.iv = shl i32 %trunc.iv.2, 16
269+
br label %loop.latch
270+
271+
loop.latch:
272+
%p = phi i32 [ %shl.iv, %then ], [ %trunc.iv, %loop.header ]
273+
%trunc.p = trunc i32 %p to i8
274+
%gep.dst = getelementptr inbounds i8, ptr %dst, i64 %iv
275+
store i8 %trunc.p, ptr %gep.dst, align 1
276+
%iv.next = add i64 %iv, 1
277+
%ec = icmp eq i64 %iv, %n
278+
br i1 %ec, label %exit, label %loop.header
279+
280+
exit:
281+
ret void
282+
}
283+
284+
285+
195286
attributes #0 = { "target-features"="+64bit,+v,+zvl256b" }
196287
attributes #1 = { "target-cpu"="sifive-p670" }
197288
;.
@@ -206,4 +297,5 @@ attributes #1 = { "target-cpu"="sifive-p670" }
206297
; CHECK: [[META8]] = !{!"llvm.loop.unroll.runtime.disable"}
207298
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META7]]}
208299
; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META7]], [[META8]]}
300+
; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META7]], [[META8]]}
209301
;.

0 commit comments

Comments
 (0)