Skip to content

Commit 1c61b5f

Browse files
committed
[LV] Don't simplify wide binops to constants if non-uniform
After 6d6eea9 we started simplifying more operands of binops when they were known constant via SCEV. However in the example in #119173, we were simplifying a reduction phi that was constant in the original IR: <x1> vector loop: { vector.body: WIDEN-REDUCTION-PHI ir<%add45> = phi ir<5>, ir<%add> ; <5, 0, 0, 0> WIDEN ir<%add> = add ir<0>, ir<%add45> ; <5, 0, 0, 0> No successors } --> <x1> vector loop: { vector.body: WIDEN-REDUCTION-PHI ir<%add45> = phi ir<5>, ir<%add> ; <5, 0, 0, 0> WIDEN ir<%add> = add ir<0>, ir<5> ; <5, 5, 5, 5> No successors } Whilst the underlying value is constant, the widened reduction PHI isn't uniform so we can't simplify it. This fixes #119173 by checking if the operand is known to be uniform, but also requires doing the same fix in the legacy cost model as well in order to avoid the cost-model mismatch assertion from #107015 again.
1 parent 43d9835 commit 1c61b5f

File tree

2 files changed

+25
-29
lines changed

2 files changed

+25
-29
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6716,9 +6716,16 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
67166716
// If we're speculating on the stride being 1, the multiplication may
67176717
// fold away. We can generalize this for all operations using the notion
67186718
// of neutral elements. (TODO)
6719+
auto IsAlwaysOne = [this, VF](Value *V) {
6720+
// Reduction phi SCEVs may be constant when scalar, but non-uniform when
6721+
// vectorized and unfoldable.
6722+
if (auto *I = dyn_cast<Instruction>(V);
6723+
I && !isUniformAfterVectorization(I, VF))
6724+
return false;
6725+
return PSE.getSCEV(V)->isOne();
6726+
};
67196727
if (I->getOpcode() == Instruction::Mul &&
6720-
(PSE.getSCEV(I->getOperand(0))->isOne() ||
6721-
PSE.getSCEV(I->getOperand(1))->isOne()))
6728+
(IsAlwaysOne(I->getOperand(0)) || IsAlwaysOne(I->getOperand(1))))
67226729
return 0;
67236730

67246731
// Detect reduction patterns
@@ -8632,6 +8639,8 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
86328639
// to replace operands with constants.
86338640
ScalarEvolution &SE = *PSE.getSE();
86348641
auto GetConstantViaSCEV = [this, &SE](VPValue *Op) {
8642+
if (!vputils::isUniformAfterVectorization(Op))
8643+
return Op;
86358644
Value *V = Op->getUnderlyingValue();
86368645
if (isa<Constant>(V) || !SE.isSCEVable(V->getType()))
86378646
return Op;

llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll

Lines changed: 14 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -7,34 +7,20 @@ target triple = "arm64-apple-macosx"
77
define i64 @mul_select_operand_known_1_via_scev() {
88
; CHECK-LABEL: define i64 @mul_select_operand_known_1_via_scev() {
99
; CHECK-NEXT: [[ENTRY:.*]]:
10-
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
11-
; CHECK: [[VECTOR_PH]]:
12-
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
13-
; CHECK: [[VECTOR_BODY]]:
14-
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
15-
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ <i64 12, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_PHI]], %[[VECTOR_BODY]] ]
16-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
17-
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
18-
; CHECK: [[MIDDLE_BLOCK]]:
19-
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> [[VEC_PHI]])
20-
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
21-
; CHECK: [[SCALAR_PH]]:
22-
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 12, %[[ENTRY]] ]
23-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
2410
; CHECK-NEXT: br label %[[LOOP:.*]]
2511
; CHECK: [[LOOP]]:
26-
; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
27-
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
12+
; CHECK-NEXT: [[RED:%.*]] = phi i64 [ 12, %[[ENTRY]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
13+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
2814
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IV]], 1
2915
; CHECK-NEXT: [[CMP1_I:%.*]] = icmp eq i32 [[TMP1]], 0
3016
; CHECK-NEXT: [[NARROW_I:%.*]] = select i1 [[CMP1_I]], i32 1, i32 [[IV]]
3117
; CHECK-NEXT: [[MUL:%.*]] = zext nneg i32 [[NARROW_I]] to i64
3218
; CHECK-NEXT: [[RED_NEXT]] = mul nsw i64 [[RED]], [[MUL]]
3319
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
3420
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 1
35-
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
21+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
3622
; CHECK: [[EXIT]]:
37-
; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP0]], %[[MIDDLE_BLOCK]] ]
23+
; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ]
3824
; CHECK-NEXT: ret i64 [[RES]]
3925
;
4026
entry:
@@ -65,27 +51,30 @@ define i32 @add_reduction_select_operand_constant_but_non_uniform() {
6551
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
6652
; CHECK: [[VECTOR_BODY]]:
6753
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
68-
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 42, i32 0, i32 0, i32 0>, %[[VECTOR_PH]] ], [ splat (i32 42), %[[VECTOR_BODY]] ]
69-
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ splat (i32 42), %[[VECTOR_BODY]] ]
54+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 42, i32 0, i32 0, i32 0>, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
55+
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ]
56+
; CHECK-NEXT: [[TMP2]] = add <4 x i32> zeroinitializer, [[VEC_PHI]]
57+
; CHECK-NEXT: [[TMP1]] = add <4 x i32> zeroinitializer, [[VEC_PHI1]]
7058
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
7159
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 64
72-
; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
60+
; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
7361
; CHECK: [[MIDDLE_BLOCK]]:
74-
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> splat (i32 84))
62+
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
63+
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
7564
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
7665
; CHECK: [[SCALAR_PH]]:
7766
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 64, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
78-
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ 42, %[[ENTRY]] ]
67+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ 42, %[[ENTRY]] ]
7968
; CHECK-NEXT: br label %[[LOOP:.*]]
8069
; CHECK: [[LOOP]]:
8170
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD2_REASS:%.*]], %[[LOOP]] ]
8271
; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
8372
; CHECK-NEXT: [[ADD2_REASS]] = add i32 [[IV]], 1
8473
; CHECK-NEXT: [[RDX_NEXT]] = add i32 0, [[RDX]]
8574
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD2_REASS]], 64
86-
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
75+
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
8776
; CHECK: [[EXIT]]:
88-
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ], [ [[TMP1]], %[[MIDDLE_BLOCK]] ]
77+
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ], [ [[TMP3]], %[[MIDDLE_BLOCK]] ]
8978
; CHECK-NEXT: ret i32 [[ADD_LCSSA]]
9079
;
9180
entry:
@@ -109,6 +98,4 @@ exit:
10998
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
11099
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
111100
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
112-
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
113-
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
114101
;.

0 commit comments

Comments
 (0)