Skip to content

Commit 98388a4

Browse files
committed
[LV] Address review: really fix the bug
1 parent 8ea8b88 commit 98388a4

File tree

3 files changed

+37
-5
lines changed

3 files changed

+37
-5
lines changed

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -827,6 +827,11 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
827827
if (isa<PHINode>(I))
828828
continue;
829829

830+
// Do not widen the operands of a call, as doing that would cause a
831+
// signature mismatch.
832+
if (isa<CallBase>(I))
833+
continue;
834+
830835
if (DBits[Leader] == ~0ULL)
831836
// All bits demanded, no point continuing.
832837
continue;

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1689,7 +1689,7 @@ void VPlanTransforms::truncateToMinimalBitwidths(
16891689
}
16901690

16911691
assert(!isa<VPWidenStoreRecipe>(&R) && "stores cannot be narrowed");
1692-
if (isa<VPWidenLoadRecipe>(&R))
1692+
if (isa<VPWidenLoadRecipe, VPWidenIntrinsicRecipe>(&R))
16931693
continue;
16941694

16951695
// Shrink operands by introducing truncates as needed.

llvm/test/Transforms/LoopVectorize/pr87407.ll

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,41 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
22
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s
33

44
define i8 @pr87407(i8 %x, i64 %y, i64 %n) {
55
; CHECK-LABEL: define i8 @pr87407(
66
; CHECK-SAME: i8 [[X:%.*]], i64 [[Y:%.*]], i64 [[N:%.*]]) {
77
; CHECK-NEXT: [[ENTRY:.*]]:
88
; CHECK-NEXT: [[ZEXT_X:%.*]] = zext i8 [[X]] to i64
9+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
10+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
11+
; CHECK: [[VECTOR_PH]]:
12+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
13+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
14+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[Y]], i64 0
15+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
16+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[ZEXT_X]], i64 0
17+
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
18+
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[BROADCAST_SPLAT2]], <4 x i64> [[BROADCAST_SPLAT]])
19+
; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[TMP0]] to <4 x i1>
20+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i1> [[TMP1]], zeroinitializer
21+
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP2]] to <4 x i32>
22+
; CHECK-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 8)
23+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i8>
24+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
25+
; CHECK: [[VECTOR_BODY]]:
26+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
27+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
28+
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
29+
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
30+
; CHECK: [[MIDDLE_BLOCK]]:
31+
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i8> [[TMP5]], i32 3
32+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
33+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
34+
; CHECK: [[SCALAR_PH]]:
35+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
936
; CHECK-NEXT: br label %[[LOOP:.*]]
1037
; CHECK: [[LOOP]]:
11-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
38+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
1239
; CHECK-NEXT: [[MAX:%.*]] = tail call i64 @llvm.umax.i64(i64 [[ZEXT_X]], i64 [[Y]])
1340
; CHECK-NEXT: [[CMP_MAX_0:%.*]] = icmp ne i64 [[MAX]], 0
1441
; CHECK-NEXT: [[ZEXT_CMP:%.*]] = zext i1 [[CMP_MAX_0]] to i64
@@ -17,9 +44,9 @@ define i8 @pr87407(i8 %x, i64 %y, i64 %n) {
1744
; CHECK-NEXT: [[RES:%.*]] = trunc i32 [[SHL]] to i8
1845
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
1946
; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ne i64 [[IV_NEXT]], [[N]]
20-
; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]]
47+
; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
2148
; CHECK: [[EXIT]]:
22-
; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i8 [ [[RES]], %[[LOOP]] ]
49+
; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i8 [ [[RES]], %[[LOOP]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
2350
; CHECK-NEXT: ret i8 [[RES_LCSSA]]
2451
;
2552
entry:

0 commit comments

Comments
 (0)