Skip to content

Commit 331842e

Browse files
committed
Address review comments
And simplify the test cases.
1 parent 7552cbb commit 331842e

File tree

2 files changed

+24
-41
lines changed

2 files changed

+24
-41
lines changed

llvm/lib/Transforms/Vectorize/EVLIndVarSimplify.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -83,16 +83,17 @@ static std::optional<uint32_t> getVFFromIndVar(const SCEV *Step,
8383

8484
// If not, see if the vscale_range of the parent function is a fixed value,
8585
// which makes the step value to be replaced by a constant.
86-
if (isa<SCEVConstant>(Step) && F.hasFnAttribute(Attribute::VScaleRange)) {
87-
APInt V = cast<SCEVConstant>(Step)->getAPInt().abs();
88-
ConstantRange CR = llvm::getVScaleRange(&F, 64);
89-
if (const APInt *Fixed = CR.getSingleElement()) {
90-
V = V.zextOrTrunc(Fixed->getBitWidth());
91-
uint64_t VF = V.udiv(*Fixed).getLimitedValue();
92-
if (VF && llvm::isUInt<32>(VF))
93-
return static_cast<uint32_t>(VF);
86+
if (F.hasFnAttribute(Attribute::VScaleRange))
87+
if (auto *ConstStep = dyn_cast<SCEVConstant>(Step)) {
88+
APInt V = ConstStep->getAPInt().abs();
89+
ConstantRange CR = llvm::getVScaleRange(&F, 64);
90+
if (const APInt *Fixed = CR.getSingleElement()) {
91+
V = V.zextOrTrunc(Fixed->getBitWidth());
92+
uint64_t VF = V.udiv(*Fixed).getLimitedValue();
93+
if (VF && llvm::isUInt<32>(VF))
94+
return static_cast<uint32_t>(VF);
95+
}
9496
}
95-
}
9697

9798
return std::nullopt;
9899
}

llvm/test/Transforms/LoopVectorize/RISCV/evl-iv-simplify.ll

Lines changed: 14 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
; RUN: opt -S -mtriple=riscv64 -mattr='+v' --passes='loop(evl-iv-simplify)' < %s | FileCheck %s
33
; RUN: opt -S -mtriple=riscv64 -mattr='+v' --passes='loop(evl-iv-simplify),function(simplifycfg,dce)' < %s | FileCheck %s --check-prefix=LOOP-DEL
44

5-
define void @simple(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
5+
define void @simple(ptr noalias %a, ptr noalias %b, <vscale x 4 x i32> %c, i64 %N) {
66
; CHECK-LABEL: define void @simple(
7-
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], <vscale x 4 x i32> [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
88
; CHECK-NEXT: entry:
99
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]]
1010
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
@@ -29,12 +29,9 @@ define void @simple(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
2929
; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
3030
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0
3131
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP13]]
32-
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0
33-
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
34-
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP13]]
35-
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
32+
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0
3633
; CHECK-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
37-
; CHECK-NEXT: [[TMP18:%.*]] = add nsw <vscale x 4 x i32> [[VP_OP_LOAD1]], [[VP_OP_LOAD]]
34+
; CHECK-NEXT: [[TMP18:%.*]] = add nsw <vscale x 4 x i32> [[C]], [[VP_OP_LOAD1]]
3835
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
3936
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0
4037
; CHECK-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP18]], ptr align 4 [[TMP20]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
@@ -52,10 +49,7 @@ define void @simple(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
5249
; CHECK: for.body:
5350
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
5451
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
55-
; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
56-
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
57-
; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
58-
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP24]]
52+
; CHECK-NEXT: [[ADD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
5953
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
6054
; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4
6155
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
@@ -67,7 +61,7 @@ define void @simple(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
6761
; CHECK-NEXT: ret void
6862
;
6963
; LOOP-DEL-LABEL: define void @simple(
70-
; LOOP-DEL-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
64+
; LOOP-DEL-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], <vscale x 4 x i32> [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
7165
; LOOP-DEL-NEXT: entry:
7266
; LOOP-DEL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]]
7367
; LOOP-DEL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
@@ -82,12 +76,9 @@ define void @simple(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
8276
; LOOP-DEL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP4]], i32 4, i1 true)
8377
; LOOP-DEL-NEXT: [[TMP6:%.*]] = add i64 [[EVL_BASED_IV]], 0
8478
; LOOP-DEL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP6]]
85-
; LOOP-DEL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
86-
; LOOP-DEL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP8]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
87-
; LOOP-DEL-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP6]]
88-
; LOOP-DEL-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
79+
; LOOP-DEL-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
8980
; LOOP-DEL-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP10]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
90-
; LOOP-DEL-NEXT: [[TMP11:%.*]] = add nsw <vscale x 4 x i32> [[VP_OP_LOAD1]], [[VP_OP_LOAD]]
81+
; LOOP-DEL-NEXT: [[TMP11:%.*]] = add nsw <vscale x 4 x i32> [[C]], [[VP_OP_LOAD1]]
9182
; LOOP-DEL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]]
9283
; LOOP-DEL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
9384
; LOOP-DEL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP11]], ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
@@ -98,10 +89,7 @@ define void @simple(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
9889
; LOOP-DEL: for.body:
9990
; LOOP-DEL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
10091
; LOOP-DEL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
101-
; LOOP-DEL-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
102-
; LOOP-DEL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
103-
; LOOP-DEL-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
104-
; LOOP-DEL-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP16]]
92+
; LOOP-DEL-NEXT: [[ADD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
10593
; LOOP-DEL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
10694
; LOOP-DEL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4
10795
; LOOP-DEL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
@@ -138,14 +126,11 @@ vector.body: ; preds = %vector.body, %vecto
138126
%13 = add i64 %evl.based.iv, 0
139127
%14 = getelementptr inbounds i32, ptr %b, i64 %13
140128
%15 = getelementptr inbounds i32, ptr %14, i32 0
141-
%vp.op.load = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 %15, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 %12)
142-
%16 = getelementptr inbounds i32, ptr %c, i64 %13
143-
%17 = getelementptr inbounds i32, ptr %16, i32 0
144-
%vp.op.load1 = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 %17, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 %12)
145-
%18 = add nsw <vscale x 4 x i32> %vp.op.load1, %vp.op.load
129+
%vp.op.load = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 %15, <vscale x 4 x i1> splat (i1 true), i32 %12)
130+
%18 = add nsw <vscale x 4 x i32> %c, %vp.op.load
146131
%19 = getelementptr inbounds i32, ptr %a, i64 %13
147132
%20 = getelementptr inbounds i32, ptr %19, i32 0
148-
call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %18, ptr align 4 %20, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 %12)
133+
call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %18, ptr align 4 %20, <vscale x 4 x i1> splat (i1 true), i32 %12)
149134
%21 = zext i32 %12 to i64
150135
%index.evl.next = add i64 %21, %evl.based.iv
151136
%index.next = add i64 %index, %10
@@ -163,11 +148,8 @@ for.body: ; preds = %for.body, %scalar.p
163148
%iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.next, %for.body ]
164149
%arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv
165150
%23 = load i32, ptr %arrayidx, align 4
166-
%arrayidx2 = getelementptr inbounds i32, ptr %c, i64 %iv
167-
%24 = load i32, ptr %arrayidx2, align 4
168-
%add = add nsw i32 %24, %23
169151
%arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv
170-
store i32 %add, ptr %arrayidx4, align 4
152+
store i32 %23, ptr %arrayidx4, align 4
171153
%iv.next = add nuw nsw i64 %iv, 1
172154
%exitcond.not = icmp eq i64 %iv.next, %N
173155
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !3
@@ -241,7 +223,7 @@ vector.body:
241223
%41 = sub i64 %N, %evl.based.iv
242224
%42 = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %41, i32 2, i1 true)
243225
%gep = getelementptr ptr, ptr %arg1, i64 %evl.based.iv
244-
tail call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> %broadcast.splat, ptr align 8 %gep, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 %42)
226+
tail call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> %broadcast.splat, ptr align 8 %gep, <vscale x 2 x i1> splat (i1 true), i32 %42)
245227
%43 = zext i32 %42 to i64
246228
%index.evl.next = add i64 %evl.based.iv, %43
247229
%lsr.iv.next33 = add i64 %lsr.iv32, -16

0 commit comments

Comments
 (0)