Skip to content

Commit 25e97fc

Browse files
committed
Revert "[RISCV][LoopVectorize] Use DataWithEVL as the preferred tail folding style (llvm#148686)"
This reverts commit 38318dd. The clang-riscv-gauntlet buildbot is breaking with this commit: https://lab.llvm.org/buildbot/#/builders/210/builds/371
1 parent a969bc1 commit 25e97fc

File tree

6 files changed

+442
-209
lines changed

6 files changed

+442
-209
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,8 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
116116
}
117117
TailFoldingStyle
118118
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override {
119-
return ST->hasVInstructions() ? TailFoldingStyle::DataWithEVL
120-
: TailFoldingStyle::None;
119+
return ST->hasVInstructions() ? TailFoldingStyle::Data
120+
: TailFoldingStyle::DataWithoutLaneMask;
121121
}
122122
std::optional<unsigned> getMaxVScale() const override;
123123
std::optional<unsigned> getVScaleForTuning() const override;

llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -133,15 +133,15 @@ define void @trip8_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
133133
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
134134
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
135135
; CHECK: vector.body:
136-
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 8, i32 4, i1 true)
136+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 8)
137137
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8:%.*]], i32 0
138-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP9]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
138+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP9]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
139139
; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1)
140140
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11:%.*]], i32 0
141-
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
141+
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP12]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
142142
; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i8> [[TMP10]], [[WIDE_MASKED_LOAD1]]
143143
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
144-
; CHECK-NEXT: call void @llvm.vp.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP13]], ptr align 1 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
144+
; CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP13]], ptr [[TMP14]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
145145
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
146146
; CHECK: middle.block:
147147
; CHECK-NEXT: br label [[FOR_END:%.*]]

llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll

Lines changed: 24 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -7,49 +7,29 @@ define void @test(ptr %p, i64 %a, i8 %b) {
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: br i1 false, label [[SCALAR_PH1:%.*]], label [[VECTOR_PH:%.*]]
99
; CHECK: vector.ph:
10-
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
11-
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 2
12-
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 1
13-
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 9, [[TMP2]]
14-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]]
15-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
16-
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
17-
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 2
18-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i8> poison, i8 [[B]], i64 0
19-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
20-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[A]], i64 0
21-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
22-
; CHECK-NEXT: [[TMP5:%.*]] = shl <vscale x 2 x i64> [[BROADCAST_SPLAT2]], splat (i64 48)
23-
; CHECK-NEXT: [[TMP6:%.*]] = ashr <vscale x 2 x i64> [[TMP5]], splat (i64 52)
24-
; CHECK-NEXT: [[TMP7:%.*]] = trunc <vscale x 2 x i64> [[TMP6]] to <vscale x 2 x i32>
25-
; CHECK-NEXT: [[TMP8:%.*]] = zext <vscale x 2 x i8> [[BROADCAST_SPLAT]] to <vscale x 2 x i32>
26-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[P]], i64 0
27-
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
28-
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
29-
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i32> [[TMP9]], splat (i32 1)
30-
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> zeroinitializer, [[TMP10]]
10+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B]], i64 0
11+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
12+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i64> poison, i64 [[A]], i64 0
13+
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT1]], <16 x i64> poison, <16 x i32> zeroinitializer
14+
; CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i64> [[BROADCAST_SPLAT2]], splat (i64 48)
15+
; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i64> [[TMP0]], splat (i64 52)
16+
; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[TMP1]] to <16 x i32>
17+
; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[BROADCAST_SPLAT]] to <16 x i32>
3118
; CHECK-NEXT: br label [[FOR_COND:%.*]]
3219
; CHECK: vector.body:
3320
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND]] ]
34-
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_COND]] ]
35-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_COND]] ]
36-
; CHECK-NEXT: [[AVL:%.*]] = sub i32 9, [[EVL_BASED_IV]]
37-
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 2, i1 true)
38-
; CHECK-NEXT: [[TMP12:%.*]] = mul i32 1, [[TMP11]]
39-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP12]], i64 0
40-
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
41-
; CHECK-NEXT: [[TMP13:%.*]] = icmp ule <vscale x 2 x i32> [[VEC_IND]], splat (i32 8)
42-
; CHECK-NEXT: [[TMP14:%.*]] = icmp sge <vscale x 2 x i32> [[VEC_IND]], splat (i32 2)
43-
; CHECK-NEXT: [[TMP15:%.*]] = select <vscale x 2 x i1> [[TMP13]], <vscale x 2 x i1> [[TMP14]], <vscale x 2 x i1> zeroinitializer
44-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP15]], <vscale x 2 x i32> [[TMP7]], <vscale x 2 x i32> [[TMP8]]
45-
; CHECK-NEXT: [[TMP16:%.*]] = shl <vscale x 2 x i32> [[PREDPHI]], splat (i32 8)
46-
; CHECK-NEXT: [[TMP17:%.*]] = trunc <vscale x 2 x i32> [[TMP16]] to <vscale x 2 x i8>
47-
; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> [[TMP17]], <vscale x 2 x ptr> align 1 [[BROADCAST_SPLAT4]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])
48-
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP11]], [[EVL_BASED_IV]]
49-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]]
50-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT6]]
51-
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
52-
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
21+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_COND]] ]
22+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 9)
23+
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <16 x i32> [[VEC_IND]], splat (i32 2)
24+
; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i1> [[TMP4]], <16 x i1> zeroinitializer
25+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]]
26+
; CHECK-NEXT: [[TMP6:%.*]] = shl <16 x i32> [[PREDPHI]], splat (i32 8)
27+
; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i8>
28+
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i8> [[TMP8]], i32 15
29+
; CHECK-NEXT: store i8 [[TMP40]], ptr [[P]], align 1
30+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
31+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i32> [[VEC_IND]], splat (i32 16)
32+
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
5333
; CHECK: middle.block:
5434
; CHECK-NEXT: br label [[EXIT1:%.*]]
5535
; CHECK: scalar.ph:
@@ -72,7 +52,7 @@ define void @test(ptr %p, i64 %a, i8 %b) {
7252
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHL_I32]] to i8
7353
; CHECK-NEXT: store i8 [[TRUNC]], ptr [[P]], align 1
7454
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 8
75-
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND1]], label [[EXIT1]], !llvm.loop [[LOOP4:![0-9]+]]
55+
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND1]], label [[EXIT1]], !llvm.loop [[LOOP3:![0-9]+]]
7656
; CHECK: exit:
7757
; CHECK-NEXT: ret void
7858
;
@@ -104,9 +84,8 @@ exit: ; preds = %for.body
10484
ret void
10585
}
10686
;.
107-
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
87+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
10888
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
109-
; CHECK: [[META2]] = !{!"llvm.loop.isvectorized.tailfoldingstyle", !"evl"}
110-
; CHECK: [[META3]] = !{!"llvm.loop.unroll.runtime.disable"}
111-
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META3]], [[META1]]}
89+
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
90+
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
11291
;.

0 commit comments

Comments
 (0)