Skip to content

Commit 72a794a

Browse files
committed
[LV] Fix runtime-VF logic when generating RT-checks
GeneratedRTChecks::create has a bug when calling addDiffRuntimeChecks: it persists the value of RuntimeVF from a previous call to the GetVF lambda, which results in a smaller runtime VF being returned in some cases. Fix the bug, stripping a FIXME in a test.
1 parent 80bdfcd commit 72a794a

File tree

3 files changed

+48
-37
lines changed

3 files changed

+48
-37
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1924,21 +1924,17 @@ class GeneratedRTChecks {
19241924
"vector.memcheck");
19251925

19261926
auto DiffChecks = RtPtrChecking.getDiffChecks();
1927-
if (DiffChecks) {
1928-
Value *RuntimeVF = nullptr;
1929-
MemRuntimeCheckCond = addDiffRuntimeChecks(
1930-
MemCheckBlock->getTerminator(), *DiffChecks, MemCheckExp,
1931-
[VF, &RuntimeVF](IRBuilderBase &B, unsigned Bits) {
1932-
if (!RuntimeVF)
1933-
RuntimeVF = getRuntimeVF(B, B.getIntNTy(Bits), VF);
1934-
return RuntimeVF;
1935-
},
1936-
IC);
1937-
} else {
1938-
MemRuntimeCheckCond = addRuntimeChecks(
1939-
MemCheckBlock->getTerminator(), L, RtPtrChecking.getChecks(),
1940-
MemCheckExp, VectorizerParams::HoistRuntimeChecks);
1941-
}
1927+
MemRuntimeCheckCond =
1928+
DiffChecks
1929+
? addDiffRuntimeChecks(
1930+
MemCheckBlock->getTerminator(), *DiffChecks, MemCheckExp,
1931+
[VF](IRBuilderBase &B, unsigned Bits) {
1932+
return getRuntimeVF(B, B.getIntNTy(Bits), VF);
1933+
},
1934+
IC)
1935+
: addRuntimeChecks(MemCheckBlock->getTerminator(), L,
1936+
RtPtrChecking.getChecks(), MemCheckExp,
1937+
VectorizerParams::HoistRuntimeChecks);
19421938
assert(MemRuntimeCheckCond &&
19431939
"no RT checks generated although RtPtrChecking "
19441940
"claimed checks are required");

llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ target triple = "aarch64-unknown-linux-gnu"
55

66
; Test case where the minimum profitable trip count due to runtime checks
77
; exceeds VF.getKnownMinValue() * UF.
8-
; FIXME: The code currently incorrectly is missing a umax(VF * UF, 28).
98
define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr %src.1, ptr %src.2, i64 %n) {
109
; CHECK-LABEL: @min_trip_count_due_to_runtime_checks_1(
1110
; CHECK-NEXT: entry:
@@ -16,7 +15,7 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr
1615
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1)
1716
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1817
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
19-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 20, i64 [[TMP1]])
18+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 28, i64 [[TMP1]])
2019
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], [[TMP2]]
2120
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
2221
; CHECK: vector.memcheck:
@@ -25,21 +24,29 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr
2524
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16
2625
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[DST_21]], [[DST_12]]
2726
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]]
28-
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 16
27+
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
28+
; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP7]], 2
29+
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP18]], 16
2930
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[DST_12]], [[SRC_13]]
30-
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]]
31+
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP9]]
3132
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
32-
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP4]], 16
33+
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
34+
; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP11]], 2
35+
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP22]], 16
3336
; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[DST_12]], [[SRC_25]]
34-
; CHECK-NEXT: [[DIFF_CHECK6:%.*]] = icmp ult i64 [[TMP10]], [[TMP9]]
37+
; CHECK-NEXT: [[DIFF_CHECK6:%.*]] = icmp ult i64 [[TMP10]], [[TMP13]]
3538
; CHECK-NEXT: [[CONFLICT_RDX7:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK6]]
36-
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP4]], 16
39+
; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64()
40+
; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP24]], 2
41+
; CHECK-NEXT: [[TMP38:%.*]] = mul i64 [[TMP26]], 16
3742
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[DST_21]], [[SRC_13]]
38-
; CHECK-NEXT: [[DIFF_CHECK8:%.*]] = icmp ult i64 [[TMP12]], [[TMP11]]
43+
; CHECK-NEXT: [[DIFF_CHECK8:%.*]] = icmp ult i64 [[TMP12]], [[TMP38]]
3944
; CHECK-NEXT: [[CONFLICT_RDX9:%.*]] = or i1 [[CONFLICT_RDX7]], [[DIFF_CHECK8]]
40-
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP4]], 16
45+
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
46+
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2
47+
; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 16
4148
; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[DST_21]], [[SRC_25]]
42-
; CHECK-NEXT: [[DIFF_CHECK10:%.*]] = icmp ult i64 [[TMP14]], [[TMP13]]
49+
; CHECK-NEXT: [[DIFF_CHECK10:%.*]] = icmp ult i64 [[TMP14]], [[TMP21]]
4350
; CHECK-NEXT: [[CONFLICT_RDX11:%.*]] = or i1 [[CONFLICT_RDX9]], [[DIFF_CHECK10]]
4451
; CHECK-NEXT: br i1 [[CONFLICT_RDX11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
4552
; CHECK: vector.ph:

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-call-intrinsics.ll

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
1919
; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]]
2020
; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
2121
; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
22-
; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 13, i64 [[TMP2]])
22+
; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP2]])
2323
; IF-EVL-NEXT: [[TMP22:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]]
2424
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
2525
; IF-EVL: [[VECTOR_MEMCHECK]]:
@@ -28,9 +28,11 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
2828
; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4
2929
; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]]
3030
; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]]
31-
; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4
31+
; IF-EVL-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
32+
; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP15]], 4
33+
; IF-EVL-NEXT: [[TMP30:%.*]] = mul i64 [[TMP25]], 4
3234
; IF-EVL-NEXT: [[TMP26:%.*]] = sub i64 [[A1]], [[C3]]
33-
; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]]
35+
; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP30]]
3436
; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
3537
; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
3638
; IF-EVL: [[VECTOR_PH]]:
@@ -134,7 +136,7 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
134136
; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]]
135137
; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
136138
; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
137-
; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 13, i64 [[TMP2]])
139+
; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP2]])
138140
; IF-EVL-NEXT: [[TMP22:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]]
139141
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
140142
; IF-EVL: [[VECTOR_MEMCHECK]]:
@@ -143,9 +145,11 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
143145
; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4
144146
; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]]
145147
; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]]
146-
; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4
148+
; IF-EVL-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
149+
; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP15]], 4
150+
; IF-EVL-NEXT: [[TMP30:%.*]] = mul i64 [[TMP25]], 4
147151
; IF-EVL-NEXT: [[TMP26:%.*]] = sub i64 [[A1]], [[C3]]
148-
; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]]
152+
; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP30]]
149153
; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
150154
; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
151155
; IF-EVL: [[VECTOR_PH]]:
@@ -249,7 +253,7 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
249253
; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]]
250254
; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
251255
; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
252-
; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 13, i64 [[TMP2]])
256+
; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP2]])
253257
; IF-EVL-NEXT: [[TMP22:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]]
254258
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
255259
; IF-EVL: [[VECTOR_MEMCHECK]]:
@@ -258,9 +262,11 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
258262
; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4
259263
; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]]
260264
; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]]
261-
; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4
265+
; IF-EVL-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
266+
; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP15]], 4
267+
; IF-EVL-NEXT: [[TMP30:%.*]] = mul i64 [[TMP25]], 4
262268
; IF-EVL-NEXT: [[TMP26:%.*]] = sub i64 [[A1]], [[C3]]
263-
; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]]
269+
; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP30]]
264270
; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
265271
; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
266272
; IF-EVL: [[VECTOR_PH]]:
@@ -364,7 +370,7 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
364370
; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]]
365371
; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
366372
; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
367-
; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 13, i64 [[TMP2]])
373+
; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP2]])
368374
; IF-EVL-NEXT: [[TMP22:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]]
369375
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
370376
; IF-EVL: [[VECTOR_MEMCHECK]]:
@@ -373,9 +379,11 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
373379
; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4
374380
; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]]
375381
; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]]
376-
; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4
382+
; IF-EVL-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
383+
; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP15]], 4
384+
; IF-EVL-NEXT: [[TMP30:%.*]] = mul i64 [[TMP25]], 4
377385
; IF-EVL-NEXT: [[TMP26:%.*]] = sub i64 [[A1]], [[C3]]
378-
; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]]
386+
; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP30]]
379387
; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
380388
; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
381389
; IF-EVL: [[VECTOR_PH]]:

0 commit comments

Comments
 (0)