Skip to content

Commit 9454011

Browse files
authored
Merge branch 'main' into fix/161070
2 parents cb5b183 + d297987 commit 9454011

File tree

6 files changed

+225
-42
lines changed

6 files changed

+225
-42
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
#include "llvm/Support/KnownBits.h"
6565
#include "llvm/Support/KnownFPClass.h"
6666
#include "llvm/Support/MathExtras.h"
67+
#include "llvm/Support/TypeSize.h"
6768
#include "llvm/Support/raw_ostream.h"
6869
#include "llvm/Transforms/InstCombine/InstCombiner.h"
6970
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
@@ -3781,6 +3782,17 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
37813782
return replaceInstUsesWith(CI, Res);
37823783
}
37833784
}
3785+
3786+
// vector.reduce.add.vNiM(splat(%x)) -> mul(%x, N)
3787+
if (Value *Splat = getSplatValue(Arg)) {
3788+
ElementCount VecToReduceCount =
3789+
cast<VectorType>(Arg->getType())->getElementCount();
3790+
if (VecToReduceCount.isFixed()) {
3791+
unsigned VectorSize = VecToReduceCount.getFixedValue();
3792+
return BinaryOperator::CreateMul(
3793+
Splat, ConstantInt::get(Splat->getType(), VectorSize));
3794+
}
3795+
}
37843796
}
37853797
[[fallthrough]];
37863798
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2853,6 +2853,7 @@ void VPlanTransforms::replaceSymbolicStrides(
28532853
return R->getParent()->getParent() ||
28542854
R->getParent() == Plan.getVectorLoopRegion()->getSinglePredecessor();
28552855
};
2856+
ValueToSCEVMapTy RewriteMap;
28562857
for (const SCEV *Stride : StridesMap.values()) {
28572858
using namespace SCEVPatternMatch;
28582859
auto *StrideV = cast<SCEVUnknown>(Stride)->getValue();
@@ -2880,6 +2881,22 @@ void VPlanTransforms::replaceSymbolicStrides(
28802881
VPValue *CI = Plan.getOrAddLiveIn(ConstantInt::get(U->getType(), C));
28812882
StrideVPV->replaceUsesWithIf(CI, CanUseVersionedStride);
28822883
}
2884+
RewriteMap[StrideV] = PSE.getSCEV(StrideV);
2885+
}
2886+
2887+
for (VPRecipeBase &R : *Plan.getEntry()) {
2888+
auto *ExpSCEV = dyn_cast<VPExpandSCEVRecipe>(&R);
2889+
if (!ExpSCEV)
2890+
continue;
2891+
const SCEV *ScevExpr = ExpSCEV->getSCEV();
2892+
auto *NewSCEV =
2893+
SCEVParameterRewriter::rewrite(ScevExpr, *PSE.getSE(), RewriteMap);
2894+
if (NewSCEV != ScevExpr) {
2895+
VPValue *NewExp = vputils::getOrCreateVPValueForSCEVExpr(Plan, NewSCEV);
2896+
ExpSCEV->replaceAllUsesWith(NewExp);
2897+
if (Plan.getTripCount() == ExpSCEV)
2898+
Plan.resetTripCount(NewExp);
2899+
}
28832900
}
28842901
}
28852902

llvm/test/Transforms/InstCombine/vector-reductions.ll

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,3 +308,174 @@ define i32 @diff_of_sums_type_mismatch2(<8 x i32> %v0, <4 x i32> %v1) {
308308
%r = sub i32 %r0, %r1
309309
ret i32 %r
310310
}
311+
312+
define i32 @constant_multiplied_4xi32(i32 %0) {
313+
; CHECK-LABEL: @constant_multiplied_4xi32(
314+
; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 2
315+
; CHECK-NEXT: ret i32 [[TMP2]]
316+
;
317+
%2 = insertelement <4 x i32> poison, i32 %0, i64 0
318+
%3 = shufflevector <4 x i32> %2, <4 x i32> poison, <4 x i32> zeroinitializer
319+
%4 = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %3)
320+
ret i32 %4
321+
}
322+
323+
define i32 @constant_multiplied_3xi32(i32 %0) {
324+
; CHECK-LABEL: @constant_multiplied_3xi32(
325+
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP0:%.*]], 3
326+
; CHECK-NEXT: ret i32 [[TMP2]]
327+
;
328+
%2 = insertelement <3 x i32> poison, i32 %0, i64 0
329+
%3 = shufflevector <3 x i32> %2, <3 x i32> poison, <3 x i32> zeroinitializer
330+
%4 = tail call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> %3)
331+
ret i32 %4
332+
}
333+
334+
define i64 @constant_multiplied_4xi64(i64 %0) {
335+
; CHECK-LABEL: @constant_multiplied_4xi64(
336+
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 2
337+
; CHECK-NEXT: ret i64 [[TMP2]]
338+
;
339+
%2 = insertelement <4 x i64> poison, i64 %0, i64 0
340+
%3 = shufflevector <4 x i64> %2, <4 x i64> poison, <4 x i32> zeroinitializer
341+
%4 = tail call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %3)
342+
ret i64 %4
343+
}
344+
345+
define i32 @constant_multiplied_8xi32(i32 %0) {
346+
; CHECK-LABEL: @constant_multiplied_8xi32(
347+
; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 3
348+
; CHECK-NEXT: ret i32 [[TMP2]]
349+
;
350+
%2 = insertelement <4 x i32> poison, i32 %0, i64 0
351+
%3 = shufflevector <4 x i32> %2, <4 x i32> poison, <8 x i32> zeroinitializer
352+
%4 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %3)
353+
ret i32 %4
354+
}
355+
356+
357+
define i32 @constant_multiplied_16xi32(i32 %0) {
358+
; CHECK-LABEL: @constant_multiplied_16xi32(
359+
; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 4
360+
; CHECK-NEXT: ret i32 [[TMP2]]
361+
;
362+
%2 = insertelement <4 x i32> poison, i32 %0, i64 0
363+
%3 = shufflevector <4 x i32> %2, <4 x i32> poison, <16 x i32> zeroinitializer
364+
%4 = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %3)
365+
ret i32 %4
366+
}
367+
368+
369+
define i32 @constant_multiplied_4xi32_at_idx1(i32 %0) {
370+
; CHECK-LABEL: @constant_multiplied_4xi32_at_idx1(
371+
; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 2
372+
; CHECK-NEXT: ret i32 [[TMP2]]
373+
;
374+
%2 = insertelement <4 x i32> poison, i32 %0, i64 1
375+
%3 = shufflevector <4 x i32> %2, <4 x i32> poison,
376+
<4 x i32> <i32 1, i32 1, i32 1, i32 1>
377+
%4 = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %3)
378+
ret i32 %4
379+
}
380+
381+
define i32 @negative_constant_multiplied_4xi32(i32 %0) {
382+
; CHECK-LABEL: @negative_constant_multiplied_4xi32(
383+
; CHECK-NEXT: ret i32 poison
384+
;
385+
%2 = insertelement <4 x i32> poison, i32 %0, i64 1
386+
%3 = shufflevector <4 x i32> %2, <4 x i32> poison, <4 x i32> zeroinitializer
387+
%4 = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %3)
388+
ret i32 %4
389+
}
390+
391+
define i32 @constant_multiplied_6xi32(i32 %0) {
392+
; CHECK-LABEL: @constant_multiplied_6xi32(
393+
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP0:%.*]], 6
394+
; CHECK-NEXT: ret i32 [[TMP2]]
395+
;
396+
%2 = insertelement <4 x i32> poison, i32 %0, i64 0
397+
%3 = shufflevector <4 x i32> %2, <4 x i32> poison, <6 x i32> zeroinitializer
398+
%4 = tail call i32 @llvm.vector.reduce.add.v6i32(<6 x i32> %3)
399+
ret i32 %4
400+
}
401+
402+
define i64 @constant_multiplied_6xi64(i64 %0) {
403+
; CHECK-LABEL: @constant_multiplied_6xi64(
404+
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP0:%.*]], 6
405+
; CHECK-NEXT: ret i64 [[TMP2]]
406+
;
407+
%2 = insertelement <4 x i64> poison, i64 %0, i64 0
408+
%3 = shufflevector <4 x i64> %2, <4 x i64> poison, <6 x i32> zeroinitializer
409+
%4 = tail call i64 @llvm.vector.reduce.add.v6i64(<6 x i64> %3)
410+
ret i64 %4
411+
}
412+
413+
define i1 @constant_multiplied_8xi1(i1 %0) {
414+
; CHECK-LABEL: @constant_multiplied_8xi1(
415+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i1> poison, i1 [[TMP0:%.*]], i64 0
416+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <8 x i32> zeroinitializer
417+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i1> [[TMP3]] to i8
418+
; CHECK-NEXT: [[TMP5:%.*]] = call range(i8 0, 9) i8 @llvm.ctpop.i8(i8 [[TMP4]])
419+
; CHECK-NEXT: [[TMP6:%.*]] = trunc i8 [[TMP5]] to i1
420+
; CHECK-NEXT: ret i1 [[TMP6]]
421+
;
422+
%2 = insertelement <8 x i1> poison, i1 %0, i32 0
423+
%3 = shufflevector <8 x i1> %2, <8 x i1> poison, <8 x i32> zeroinitializer
424+
%4 = tail call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> %3)
425+
ret i1 %4
426+
}
427+
428+
define i2 @constant_multiplied_4xi2(i2 %0) {
429+
; CHECK-LABEL: @constant_multiplied_4xi2(
430+
; CHECK-NEXT: ret i2 0
431+
;
432+
%2 = insertelement <4 x i2> poison, i2 %0, i32 0
433+
%3 = shufflevector <4 x i2> %2, <4 x i2> poison, <4 x i32> zeroinitializer
434+
%4 = tail call i2 @llvm.vector.reduce.add.v4i2(<4 x i2> %3)
435+
ret i2 %4
436+
}
437+
438+
define i2 @constant_multiplied_5xi2(i2 %0) {
439+
; CHECK-LABEL: @constant_multiplied_5xi2(
440+
; CHECK-NEXT: ret i2 [[TMP0:%.*]]
441+
;
442+
%2 = insertelement <5 x i2> poison, i2 %0, i64 0
443+
%3 = shufflevector <5 x i2> %2, <5 x i2> poison, <5 x i32> zeroinitializer
444+
%4 = tail call i2 @llvm.vector.reduce.add.v5i2(<5 x i2> %3)
445+
ret i2 %4
446+
}
447+
448+
define i2 @constant_multiplied_6xi2(i2 %0) {
449+
; CHECK-LABEL: @constant_multiplied_6xi2(
450+
; CHECK-NEXT: [[TMP2:%.*]] = shl i2 [[TMP0:%.*]], 1
451+
; CHECK-NEXT: ret i2 [[TMP2]]
452+
;
453+
%2 = insertelement <6 x i2> poison, i2 %0, i64 0
454+
%3 = shufflevector <6 x i2> %2, <6 x i2> poison, <6 x i32> zeroinitializer
455+
%4 = tail call i2 @llvm.vector.reduce.add.v6i2(<6 x i2> %3)
456+
ret i2 %4
457+
}
458+
459+
define i2 @constant_multiplied_7xi2(i2 %0) {
460+
; CHECK-LABEL: @constant_multiplied_7xi2(
461+
; CHECK-NEXT: [[TMP2:%.*]] = sub i2 0, [[TMP0:%.*]]
462+
; CHECK-NEXT: ret i2 [[TMP2]]
463+
;
464+
%2 = insertelement <7 x i2> poison, i2 %0, i64 0
465+
%3 = shufflevector <7 x i2> %2, <7 x i2> poison, <7 x i32> zeroinitializer
466+
%4 = tail call i2 @llvm.vector.reduce.add.v7i2(<7 x i2> %3)
467+
ret i2 %4
468+
}
469+
470+
define i32 @negative_scalable_vector(i32 %0) {
471+
; CHECK-LABEL: @negative_scalable_vector(
472+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP0:%.*]], i64 0
473+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
474+
; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP3]])
475+
; CHECK-NEXT: ret i32 [[TMP4]]
476+
;
477+
%2 = insertelement <vscale x 4 x i32> poison, i32 %0, i64 0
478+
%3 = shufflevector <vscale x 4 x i32> %2, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
479+
%4 = tail call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %3)
480+
ret i32 %4
481+
}

llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -220,14 +220,18 @@ define void @expand_diff_scev_unknown(ptr %dst, i1 %invar.c, i32 %step) mustprog
220220
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[STEP]], i32 1)
221221
; CHECK-NEXT: [[TMP8:%.*]] = udiv i32 [[TMP7]], [[UMAX]]
222222
; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP6]], [[TMP8]]
223-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP9]], 2
223+
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[INDVAR_LCSSA1]], 2
224+
; CHECK-NEXT: [[SMAX1:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP12]], i32 0)
225+
; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP3]], -1
226+
; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[SMAX1]], [[TMP14]]
227+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP15]], 2
224228
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
225229
; CHECK: [[VECTOR_SCEVCHECK]]:
226230
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], 1
227231
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
228232
; CHECK: [[VECTOR_PH]]:
229-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP9]], 2
230-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP9]], [[N_MOD_VF]]
233+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP15]], 2
234+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP15]], [[N_MOD_VF]]
231235
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[IV_1_LCSSA]], [[N_VEC]]
232236
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
233237
; CHECK: [[VECTOR_BODY]]:
@@ -239,7 +243,7 @@ define void @expand_diff_scev_unknown(ptr %dst, i1 %invar.c, i32 %step) mustprog
239243
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
240244
; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
241245
; CHECK: [[MIDDLE_BLOCK]]:
242-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP9]], [[N_VEC]]
246+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP15]], [[N_VEC]]
243247
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
244248
; CHECK: [[SCALAR_PH]]:
245249
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ [[IV_1_LCSSA]], %[[LOOP_2_PREHEADER]] ], [ [[IV_1_LCSSA]], %[[VECTOR_SCEVCHECK]] ]

llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll

Lines changed: 11 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,11 @@ define void @test_versioned_with_sext_use(i32 %offset, ptr %dst) {
2222
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[OFFSET]], 1
2323
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2424
; CHECK: vector.ph:
25-
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 200, [[OFFSET_EXT]]
26-
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IV_1]], [[TMP0]]
25+
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IV_1]], 200
2726
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2827
; CHECK: vector.body:
2928
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
30-
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[INDEX]], [[OFFSET_EXT]]
31-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IV_1]], [[TMP1]]
29+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IV_1]], [[INDEX]]
3230
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP3]]
3331
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP4]], align 8
3432
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -94,13 +92,11 @@ define void @test_versioned_with_zext_use(i32 %offset, ptr %dst) {
9492
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[OFFSET]], 1
9593
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
9694
; CHECK: vector.ph:
97-
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 200, [[OFFSET_EXT]]
98-
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IV_1]], [[TMP0]]
95+
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IV_1]], 200
9996
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
10097
; CHECK: vector.body:
10198
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
102-
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[INDEX]], [[OFFSET_EXT]]
103-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IV_1]], [[TMP1]]
99+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IV_1]], [[INDEX]]
104100
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP3]]
105101
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP4]], align 8
106102
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -233,13 +229,11 @@ define void @test_versioned_with_different_uses(i32 %offset, ptr noalias %dst.1,
233229
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[OFFSET]], 1
234230
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
235231
; CHECK: vector.ph:
236-
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 200, [[OFFSET_EXT]]
237-
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IV_1]], [[TMP0]]
232+
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IV_1]], 200
238233
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
239234
; CHECK: vector.body:
240235
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
241-
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[INDEX]], [[OFFSET_EXT]]
242-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IV_1]], [[TMP1]]
236+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IV_1]], [[INDEX]]
243237
; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = trunc i64 [[INDEX]] to i32
244238
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX2]], 0
245239
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX2]], 1
@@ -414,26 +408,20 @@ define void @zext_of_i1_stride(i1 %g, ptr %dst) mustprogress {
414408
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i1 [[G]], true
415409
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
416410
; CHECK: vector.ph:
417-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 4
418-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
419-
; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[G_64]]
420411
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
421412
; CHECK: vector.body:
422413
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
423-
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[G_64]]
424-
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[OFFSET_IDX]]
414+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[INDEX]]
425415
; CHECK-NEXT: store <4 x i16> splat (i16 1), ptr [[TMP4]], align 2
426416
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
427-
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
428-
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
417+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
418+
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
429419
; CHECK: middle.block:
430-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
431-
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
420+
; CHECK-NEXT: br label [[EXIT:%.*]]
432421
; CHECK: scalar.ph:
433-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
434422
; CHECK-NEXT: br label [[LOOP:%.*]]
435423
; CHECK: loop:
436-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
424+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
437425
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[IV]]
438426
; CHECK-NEXT: store i16 [[G_16]], ptr [[GEP]], align 2
439427
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], [[G_64]]

llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,9 @@ define void @s172(i32 noundef %xa, i32 noundef %xb, ptr noundef %a, ptr noundef
1414
; CHECK-NEXT: [[SUB:%.*]] = add i32 [[XA]], -1
1515
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[SUB]] to i64
1616
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[XB]] to i64
17-
; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[TMP1]], [[TMP0]]
18-
; CHECK-NEXT: [[SMAX7:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP2]], i64 32000)
19-
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i64 [[TMP2]], 32000
20-
; CHECK-NEXT: [[UMIN8:%.*]] = zext i1 [[TMP3]] to i64
21-
; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP2]], [[UMIN8]]
22-
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[SMAX7]], [[TMP4]]
23-
; CHECK-NEXT: [[UMAX9:%.*]] = tail call i64 @llvm.umax.i64(i64 [[TMP1]], i64 1)
24-
; CHECK-NEXT: [[TMP6:%.*]] = udiv i64 [[TMP5]], [[UMAX9]]
25-
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[UMIN8]]
26-
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], 1
17+
; CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP0]], i64 31999)
18+
; CHECK-NEXT: [[SMAX10:%.*]] = add nuw nsw i64 [[TMP2]], 1
19+
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[SMAX10]], [[TMP0]]
2720
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i64 [[TMP8]], 23
2821
; CHECK-NEXT: [[IDENT_CHECK_NOT:%.*]] = icmp eq i32 [[XB]], 1
2922
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[MIN_ITERS_CHECK]], [[IDENT_CHECK_NOT]]
@@ -50,13 +43,11 @@ define void @s172(i32 noundef %xa, i32 noundef %xb, ptr noundef %a, ptr noundef
5043
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY_PREHEADER13]], label [[VECTOR_PH:%.*]]
5144
; CHECK: vector.ph:
5245
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP8]], -8
53-
; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[N_VEC]], [[TMP1]]
54-
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[TMP18]], [[TMP0]]
46+
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[N_VEC]], [[TMP0]]
5547
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
5648
; CHECK: vector.body:
5749
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
58-
; CHECK-NEXT: [[TMP19:%.*]] = mul nuw i64 [[INDEX]], [[TMP1]]
59-
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP19]], [[TMP0]]
50+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[INDEX]], [[TMP0]]
6051
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[OFFSET_IDX]]
6152
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP20]], i64 16
6253
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP20]], align 4, !alias.scope [[META0:![0-9]+]]
@@ -75,7 +66,7 @@ define void @s172(i32 noundef %xa, i32 noundef %xb, ptr noundef %a, ptr noundef
7566
; CHECK: middle.block:
7667
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]]
7768
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER13]]
78-
; CHECK: for.body.preheader13:
69+
; CHECK: for.body.preheader14:
7970
; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ [[TMP0]], [[VECTOR_MEMCHECK]] ], [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
8071
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
8172
; CHECK: for.body:

0 commit comments

Comments
 (0)