Skip to content

Commit 20c52e4

Browse files
committed
Reapply "[RISCV][LoopVectorize] Use DataWithEVL as the preferred tail folding style (#148686)"
This reverts commit 25e97fc. The original commit was reverted due to a crash in llvm-test-suite. The crash stemmed from a multiply reduction, which isn't supported for scalable VFs on RISC-V. But for EVL tail folding we only support scalable VFs, so when -force-tail-folding-style=data-with-evl is specified we check to see if there's a scalable VF, and fall back to data-without-lane-mask if there isn't. This is done in setTailFoldingStyles, but previously we were only checking if the forced tail folding style was legal, not the style returned by TTI. This version fixes this by checking the actual computed tail folding style and not just the forced one, and adds a test for the crash in llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
1 parent 4b99eb2 commit 20c52e4

File tree

7 files changed

+273
-444
lines changed

7 files changed

+273
-444
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,8 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
116116
}
117117
TailFoldingStyle
118118
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override {
119-
return ST->hasVInstructions() ? TailFoldingStyle::Data
120-
: TailFoldingStyle::DataWithoutLaneMask;
119+
return ST->hasVInstructions() ? TailFoldingStyle::DataWithEVL
120+
: TailFoldingStyle::None;
121121
}
122122
std::optional<unsigned> getMaxVScale() const override;
123123
std::optional<unsigned> getVScaleForTuning() const override;

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1354,9 +1354,10 @@ class LoopVectorizationCostModel {
13541354
ChosenTailFoldingStyle = {ForceTailFoldingStyle.getValue(),
13551355
ForceTailFoldingStyle.getValue()};
13561356

1357-
if (ForceTailFoldingStyle != TailFoldingStyle::DataWithEVL)
1357+
if (ChosenTailFoldingStyle->first != TailFoldingStyle::DataWithEVL &&
1358+
ChosenTailFoldingStyle->second != TailFoldingStyle::DataWithEVL)
13581359
return;
1359-
// Override forced styles if needed.
1360+
// Override EVL styles if needed.
13601361
// FIXME: Investigate opportunity for fixed vector factor.
13611362
bool EVLIsLegal = UserIC <= 1 && IsScalableVF &&
13621363
TTI.hasActiveVectorLength() && !EnableVPlanNativePath;

llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll

Lines changed: 65 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -133,15 +133,15 @@ define void @trip8_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
133133
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
134134
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
135135
; CHECK: vector.body:
136-
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 8)
136+
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 8, i32 4, i1 true)
137137
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8:%.*]], i32 0
138-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP9]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
138+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP9]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
139139
; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1)
140140
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11:%.*]], i32 0
141-
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP12]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
141+
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
142142
; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i8> [[TMP10]], [[WIDE_MASKED_LOAD1]]
143143
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
144-
; CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP13]], ptr [[TMP14]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
144+
; CHECK-NEXT: call void @llvm.vp.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP13]], ptr align 1 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
145145
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
146146
; CHECK: middle.block:
147147
; CHECK-NEXT: br label [[FOR_END:%.*]]
@@ -358,3 +358,64 @@ for.end: ; preds = %for.body
358358

359359
attributes #0 = { "target-features"="+v,+d" vscale_range(2, 1024) }
360360

361+
; This is a non-power-of-2 low trip count, so we will try to tail-fold this. But
362+
; the reduction is a multiply which is only legal for fixed-length VFs. But
363+
; fixed-length VFs aren't legal for the default tail-folding style
364+
; data-with-evl, so make sure we gracefully fall back to data-without-lane-mask.
365+
366+
define i8 @mul_non_pow_2_low_trip_count(ptr noalias %a) {
367+
; CHECK-LABEL: @mul_non_pow_2_low_trip_count(
368+
; CHECK-NEXT: entry:
369+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
370+
; CHECK: vector.ph:
371+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
372+
; CHECK: vector.body:
373+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
374+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i8> [ <i8 2, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
375+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[INDEX]], i64 0
376+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> poison, <16 x i32> zeroinitializer
377+
; CHECK-NEXT: [[VEC_IV:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
378+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = icmp ule <16 x i64> [[VEC_IV]], splat (i64 9)
379+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[INDEX]]
380+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
381+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
382+
; CHECK-NEXT: [[TMP2]] = mul <16 x i8> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
383+
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> [[TMP2]], <16 x i8> [[VEC_PHI]]
384+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
385+
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
386+
; CHECK: middle.block:
387+
; CHECK-NEXT: [[TMP4:%.*]] = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> [[TMP3]])
388+
; CHECK-NEXT: br label [[FOR_END:%.*]]
389+
; CHECK: scalar.ph:
390+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
391+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ 2, [[ENTRY]] ]
392+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
393+
; CHECK: for.body:
394+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
395+
; CHECK-NEXT: [[RDX:%.*]] = phi i8 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
396+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
397+
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[GEP]], align 1
398+
; CHECK-NEXT: [[MUL]] = mul i8 [[TMP5]], [[RDX]]
399+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
400+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 10
401+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
402+
; CHECK: for.end:
403+
; CHECK-NEXT: [[MUL_LCSSA:%.*]] = phi i8 [ [[MUL]], [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
404+
; CHECK-NEXT: ret i8 [[MUL_LCSSA]]
405+
;
406+
entry:
407+
br label %for.body
408+
409+
for.body: ; preds = %entry, %for.body
410+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
411+
%rdx = phi i8 [ 2, %entry ], [ %mul, %for.body ]
412+
%gep = getelementptr i8, ptr %a, i64 %iv
413+
%0 = load i8, ptr %gep
414+
%mul = mul i8 %0, %rdx
415+
%iv.next = add i64 %iv, 1
416+
%exitcond.not = icmp eq i64 %iv.next, 10
417+
br i1 %exitcond.not, label %for.end, label %for.body
418+
419+
for.end: ; preds = %for.body, %entry
420+
ret i8 %mul
421+
}

llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll

Lines changed: 45 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,29 +7,49 @@ define void @test(ptr %p, i64 %a, i8 %b) {
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: br i1 false, label [[SCALAR_PH1:%.*]], label [[VECTOR_PH:%.*]]
99
; CHECK: vector.ph:
10-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B]], i64 0
11-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
12-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i64> poison, i64 [[A]], i64 0
13-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT1]], <16 x i64> poison, <16 x i32> zeroinitializer
14-
; CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i64> [[BROADCAST_SPLAT2]], splat (i64 48)
15-
; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i64> [[TMP0]], splat (i64 52)
16-
; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[TMP1]] to <16 x i32>
17-
; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[BROADCAST_SPLAT]] to <16 x i32>
10+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
11+
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 2
12+
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 1
13+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 9, [[TMP2]]
14+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]]
15+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
16+
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
17+
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 2
18+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i8> poison, i8 [[B]], i64 0
19+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
20+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[A]], i64 0
21+
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
22+
; CHECK-NEXT: [[TMP5:%.*]] = shl <vscale x 2 x i64> [[BROADCAST_SPLAT2]], splat (i64 48)
23+
; CHECK-NEXT: [[TMP6:%.*]] = ashr <vscale x 2 x i64> [[TMP5]], splat (i64 52)
24+
; CHECK-NEXT: [[TMP7:%.*]] = trunc <vscale x 2 x i64> [[TMP6]] to <vscale x 2 x i32>
25+
; CHECK-NEXT: [[TMP8:%.*]] = zext <vscale x 2 x i8> [[BROADCAST_SPLAT]] to <vscale x 2 x i32>
26+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[P]], i64 0
27+
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
28+
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
29+
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i32> [[TMP9]], splat (i32 1)
30+
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> zeroinitializer, [[TMP10]]
1831
; CHECK-NEXT: br label [[FOR_COND:%.*]]
1932
; CHECK: vector.body:
2033
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND]] ]
21-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_COND]] ]
22-
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 9)
23-
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <16 x i32> [[VEC_IND]], splat (i32 2)
24-
; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i1> [[TMP4]], <16 x i1> zeroinitializer
25-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]]
26-
; CHECK-NEXT: [[TMP6:%.*]] = shl <16 x i32> [[PREDPHI]], splat (i32 8)
27-
; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i8>
28-
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i8> [[TMP8]], i32 15
29-
; CHECK-NEXT: store i8 [[TMP40]], ptr [[P]], align 1
30-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
31-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i32> [[VEC_IND]], splat (i32 16)
32-
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
34+
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_COND]] ]
35+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_COND]] ]
36+
; CHECK-NEXT: [[AVL:%.*]] = sub i32 9, [[EVL_BASED_IV]]
37+
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 2, i1 true)
38+
; CHECK-NEXT: [[TMP12:%.*]] = mul i32 1, [[TMP11]]
39+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP12]], i64 0
40+
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
41+
; CHECK-NEXT: [[TMP13:%.*]] = icmp ule <vscale x 2 x i32> [[VEC_IND]], splat (i32 8)
42+
; CHECK-NEXT: [[TMP14:%.*]] = icmp sge <vscale x 2 x i32> [[VEC_IND]], splat (i32 2)
43+
; CHECK-NEXT: [[TMP15:%.*]] = select <vscale x 2 x i1> [[TMP13]], <vscale x 2 x i1> [[TMP14]], <vscale x 2 x i1> zeroinitializer
44+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP15]], <vscale x 2 x i32> [[TMP7]], <vscale x 2 x i32> [[TMP8]]
45+
; CHECK-NEXT: [[TMP16:%.*]] = shl <vscale x 2 x i32> [[PREDPHI]], splat (i32 8)
46+
; CHECK-NEXT: [[TMP17:%.*]] = trunc <vscale x 2 x i32> [[TMP16]] to <vscale x 2 x i8>
47+
; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> [[TMP17]], <vscale x 2 x ptr> align 1 [[BROADCAST_SPLAT4]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])
48+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP11]], [[EVL_BASED_IV]]
49+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]]
50+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT6]]
51+
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
52+
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
3353
; CHECK: middle.block:
3454
; CHECK-NEXT: br label [[EXIT1:%.*]]
3555
; CHECK: scalar.ph:
@@ -52,7 +72,7 @@ define void @test(ptr %p, i64 %a, i8 %b) {
5272
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHL_I32]] to i8
5373
; CHECK-NEXT: store i8 [[TRUNC]], ptr [[P]], align 1
5474
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 8
55-
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND1]], label [[EXIT1]], !llvm.loop [[LOOP3:![0-9]+]]
75+
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND1]], label [[EXIT1]], !llvm.loop [[LOOP4:![0-9]+]]
5676
; CHECK: exit:
5777
; CHECK-NEXT: ret void
5878
;
@@ -84,8 +104,9 @@ exit: ; preds = %for.body
84104
ret void
85105
}
86106
;.
87-
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
107+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
88108
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
89-
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
90-
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
109+
; CHECK: [[META2]] = !{!"llvm.loop.isvectorized.tailfoldingstyle", !"evl"}
110+
; CHECK: [[META3]] = !{!"llvm.loop.unroll.runtime.disable"}
111+
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META3]], [[META1]]}
91112
;.

0 commit comments

Comments
 (0)