Skip to content

Commit 5a456c1

Browse files
authored
Revert "[VPlan] Simplify pow-of-2 (mul|udiv) -> (shl|lshr)" (#174559)
Reverts llvm/llvm-project#172477 This is causing failures for RVA23 (including some tests running away in their execution causing OOM, hence the builder dying). I will attempt to follow up on the PR with a reproducer of some kind. https://lab.llvm.org/buildbot/#/builders/210/builds/7243
1 parent 188d13d commit 5a456c1

File tree

130 files changed

+898
-850
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

130 files changed

+898
-850
lines changed

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ struct Recipe_match {
291291
return false;
292292

293293
if (R->getNumOperands() != std::tuple_size<Ops_t>::value) {
294-
assert((Opcode == Instruction::PHI || isa<VPReplicateRecipe>(R)) &&
294+
assert(Opcode == Instruction::PHI &&
295295
"non-variadic recipe with matched opcode does not have the "
296296
"expected number of operands");
297297
return false;
@@ -564,12 +564,6 @@ m_c_Mul(const Op0_t &Op0, const Op1_t &Op1) {
564564
return m_c_Binary<Instruction::Mul, Op0_t, Op1_t>(Op0, Op1);
565565
}
566566

567-
template <typename Op0_t, typename Op1_t>
568-
inline AllRecipe_match<Instruction::UDiv, Op0_t, Op1_t>
569-
m_UDiv(const Op0_t &Op0, const Op1_t &Op1) {
570-
return m_Binary<Instruction::UDiv, Op0_t, Op1_t>(Op0, Op1);
571-
}
572-
573567
/// Match a binary AND operation.
574568
template <typename Op0_t, typename Op1_t>
575569
inline AllRecipe_commutative_match<Instruction::And, Op0_t, Op1_t>

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1342,21 +1342,6 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
13421342
return Def->replaceAllUsesWith(
13431343
Def->getOperand(0) == A ? Def->getOperand(1) : Def->getOperand(0));
13441344

1345-
const APInt *APC;
1346-
if (match(Def, m_c_Mul(m_VPValue(), m_APInt(APC))) && APC->isPowerOf2())
1347-
return Def->replaceAllUsesWith(Builder.createNaryOp(
1348-
Instruction::Shl,
1349-
{Def->getOperand(0),
1350-
Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1351-
*cast<VPRecipeWithIRFlags>(Def), Def->getDebugLoc()));
1352-
1353-
if (match(Def, m_UDiv(m_VPValue(), m_APInt(APC))) && APC->isPowerOf2())
1354-
return Def->replaceAllUsesWith(Builder.createNaryOp(
1355-
Instruction::LShr,
1356-
{Def->getOperand(0),
1357-
Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1358-
{}, Def->getDebugLoc()));
1359-
13601345
if (match(Def, m_Not(m_VPValue(A)))) {
13611346
if (match(A, m_Not(m_VPValue(A))))
13621347
return Def->replaceAllUsesWith(A);

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
99
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
1010
; CHECK: vector.ph:
1111
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
12-
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
12+
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
1313
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
1414
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
1515
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -71,7 +71,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
7171
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
7272
; CHECK: vector.ph:
7373
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
74-
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
74+
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
7575
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
7676
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
7777
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -528,8 +528,8 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
528528
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
529529
; DEFAULT: [[VECTOR_PH]]:
530530
; DEFAULT-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
531-
; DEFAULT-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP4]], 2
532-
; DEFAULT-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP11]], 2
531+
; DEFAULT-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP4]], 4
532+
; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP11]], 4
533533
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 257, [[TMP5]]
534534
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 257, [[N_MOD_VF]]
535535
; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[N_VEC]], 8
@@ -545,7 +545,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
545545
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[TMP8]], i64 0
546546
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
547547
; DEFAULT-NEXT: [[TMP9:%.*]] = uitofp <vscale x 4 x i16> [[BROADCAST_SPLAT]] to <vscale x 4 x double>
548-
; DEFAULT-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP11]], 1
548+
; DEFAULT-NEXT: [[TMP14:%.*]] = mul nuw nsw i64 [[TMP11]], 2
549549
; DEFAULT-NEXT: [[TMP17:%.*]] = mul nuw nsw i64 [[TMP11]], 3
550550
; DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[NEXT_GEP1]], i64 [[TMP11]]
551551
; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[NEXT_GEP1]], i64 [[TMP14]]
@@ -568,7 +568,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
568568
; PRED-NEXT: br label %[[VECTOR_PH:.*]]
569569
; PRED: [[VECTOR_PH]]:
570570
; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
571-
; PRED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
571+
; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
572572
; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
573573
; PRED-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2
574574
; PRED-NEXT: [[TMP8:%.*]] = sub i64 257, [[TMP7]]
@@ -1219,7 +1219,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 {
12191219
; DEFAULT-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
12201220
; DEFAULT: [[VECTOR_PH]]:
12211221
; DEFAULT-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
1222-
; DEFAULT-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 2
1222+
; DEFAULT-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4
12231223
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP9]]
12241224
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
12251225
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i8> poison, i8 [[Y]], i64 0
@@ -1273,7 +1273,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 {
12731273
; PRED-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
12741274
; PRED: [[VECTOR_PH]]:
12751275
; PRED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
1276-
; PRED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
1276+
; PRED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16
12771277
; PRED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
12781278
; PRED-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 4
12791279
; PRED-NEXT: [[TMP9:%.*]] = sub i64 [[TMP0]], [[TMP8]]

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
2121
; CHECK-NEXT: br i1 [[TMP7]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
2222
; CHECK: [[VECTOR_PH]]:
2323
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
24-
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP8]], 1
25-
; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP11]], 1
24+
; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP8]], 2
25+
; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP11]], 2
2626
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP9]]
2727
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
2828
; CHECK-NEXT: [[TMP18:%.*]] = sdiv i64 [[M]], [[CONV6]]
@@ -106,7 +106,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
106106
; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
107107
; CHECK: [[VECTOR_PH]]:
108108
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
109-
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
109+
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2
110110
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
111111
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1
112112
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[N]], [[TMP11]]
@@ -220,7 +220,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
220220
; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
221221
; CHECK: [[VECTOR_PH]]:
222222
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
223-
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
223+
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2
224224
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
225225
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1
226226
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP0]], [[TMP11]]

llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ define void @f1(ptr %A) #0 {
1212
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
1313
; CHECK: vector.ph:
1414
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
15-
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
15+
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
1616
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
1717
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
1818
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]

llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur
7777
; SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
7878
; SVE: vector.ph:
7979
; SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
80-
; SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
80+
; SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
8181
; SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
8282
; SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
8383
; SVE-NEXT: br label [[VECTOR_BODY:%.*]]

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
3030
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK3]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
3131
; DEFAULT: [[VECTOR_PH]]:
3232
; DEFAULT-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
33-
; DEFAULT-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP9]], 3
34-
; DEFAULT-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP13]], 1
33+
; DEFAULT-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP9]], 8
34+
; DEFAULT-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP13]], 2
3535
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP10]]
3636
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
3737
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 [[X]], i64 0
@@ -70,7 +70,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
7070
; DEFAULT: [[VEC_EPILOG_PH]]:
7171
; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
7272
; DEFAULT-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64()
73-
; DEFAULT-NEXT: [[TMP34:%.*]] = shl nuw i64 [[TMP33]], 2
73+
; DEFAULT-NEXT: [[TMP34:%.*]] = mul nuw i64 [[TMP33]], 4
7474
; DEFAULT-NEXT: [[N_MOD_VF5:%.*]] = urem i64 [[TMP0]], [[TMP34]]
7575
; DEFAULT-NEXT: [[N_VEC6:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF5]]
7676
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0
@@ -130,7 +130,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
130130
; PRED-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
131131
; PRED: [[VECTOR_PH]]:
132132
; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
133-
; PRED-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 4
133+
; PRED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16
134134
; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[X]], i64 0
135135
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
136136
; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()

llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -227,8 +227,8 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8
227227
; INTERLEAVE-4-VLA-NEXT: br i1 [[MIN_ITERS_CHECK3]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
228228
; INTERLEAVE-4-VLA: vector.ph:
229229
; INTERLEAVE-4-VLA-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
230-
; INTERLEAVE-4-VLA-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP6]], 4
231-
; INTERLEAVE-4-VLA-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP10]], 2
230+
; INTERLEAVE-4-VLA-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP6]], 16
231+
; INTERLEAVE-4-VLA-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP10]], 4
232232
; INTERLEAVE-4-VLA-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP7]]
233233
; INTERLEAVE-4-VLA-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
234234
; INTERLEAVE-4-VLA-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i64 0
@@ -239,7 +239,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8
239239
; INTERLEAVE-4-VLA: vector.body:
240240
; INTERLEAVE-4-VLA-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
241241
; INTERLEAVE-4-VLA-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]]
242-
; INTERLEAVE-4-VLA-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[TMP10]], 1
242+
; INTERLEAVE-4-VLA-NEXT: [[TMP13:%.*]] = mul nuw nsw i64 [[TMP10]], 2
243243
; INTERLEAVE-4-VLA-NEXT: [[TMP16:%.*]] = mul nuw nsw i64 [[TMP10]], 3
244244
; INTERLEAVE-4-VLA-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[TMP10]]
245245
; INTERLEAVE-4-VLA-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[TMP13]]

llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,8 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) {
144144
; INTERLEAVE-4-VLA-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
145145
; INTERLEAVE-4-VLA: vector.ph:
146146
; INTERLEAVE-4-VLA-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
147-
; INTERLEAVE-4-VLA-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 2
148-
; INTERLEAVE-4-VLA-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 2
147+
; INTERLEAVE-4-VLA-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4
148+
; INTERLEAVE-4-VLA-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 4
149149
; INTERLEAVE-4-VLA-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
150150
; INTERLEAVE-4-VLA-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
151151
; INTERLEAVE-4-VLA-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -156,7 +156,7 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) {
156156
; INTERLEAVE-4-VLA-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
157157
; INTERLEAVE-4-VLA-NEXT: [[VEC_PHI4:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
158158
; INTERLEAVE-4-VLA-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]]
159-
; INTERLEAVE-4-VLA-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP5]], 1
159+
; INTERLEAVE-4-VLA-NEXT: [[TMP9:%.*]] = mul nuw nsw i64 [[TMP5]], 2
160160
; INTERLEAVE-4-VLA-NEXT: [[TMP12:%.*]] = mul nuw nsw i64 [[TMP5]], 3
161161
; INTERLEAVE-4-VLA-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP5]]
162162
; INTERLEAVE-4-VLA-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP9]]

0 commit comments

Comments
 (0)