Skip to content

Commit 806a797

Browse files
committed
[VPlan] Support plans with vector pointers in narrowInterleaveGroups.
After narrowing interleave groups and related memory operations, all vector pointers should be removed. Remove the check. In preparation for llvm#149706.
1 parent 69ccc39 commit 806a797

File tree

3 files changed

+40
-55
lines changed

3 files changed

+40
-55
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3642,14 +3642,6 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
36423642
if (R.mayWriteToMemory() && !InterleaveR)
36433643
return;
36443644

3645-
// Do not narrow interleave groups if there are VectorPointer recipes and
3646-
// the plan was unrolled. The recipe implicitly uses VF from
3647-
// VPTransformState.
3648-
// TODO: Remove restriction once the VF for the VectorPointer offset is
3649-
// modeled explicitly as operand.
3650-
if (isa<VPVectorPointerRecipe>(&R) && Plan.getUF() > 1)
3651-
return;
3652-
36533645
// All other ops are allowed, but we reject uses that cannot be converted
36543646
// when checking all allowed consumers (store interleave groups) below.
36553647
if (!InterleaveR)
@@ -3780,6 +3772,9 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
37803772
Plan.getVF().replaceAllUsesWith(
37813773
Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
37823774
removeDeadRecipes(Plan);
3775+
assert(none_of(*VectorLoop->getEntryBasicBlock(),
3776+
IsaPred<VPVectorPointerRecipe>) &&
3777+
"All VPVectorPointerRecipes should have been removed");
37833778
}
37843779

37853780
/// Add branch weight metadata, if the \p Plan's middle block is terminated by a

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll

Lines changed: 25 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -326,46 +326,43 @@ define void @single_fmul_used_by_each_member(ptr noalias %A, ptr noalias %B, ptr
326326
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
327327
; CHECK: [[VECTOR_BODY]]:
328328
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
329+
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 1
329330
; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 2
330-
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 4
331-
; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 6
331+
; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 3
332332
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX]]
333-
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i32 2
334-
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i32 4
335-
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP23]], i32 6
336-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP23]], align 8
337-
; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <2 x double>, ptr [[TMP25]], align 8
338-
; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <2 x double>, ptr [[TMP26]], align 8
339-
; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <2 x double>, ptr [[TMP27]], align 8
333+
; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP23]], align 8
334+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x double> poison, double [[TMP24]], i64 0
335+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT1]], <2 x double> poison, <2 x i32> zeroinitializer
336+
; CHECK-NEXT: [[TMP25:%.*]] = load double, ptr [[TMP23]], align 8
337+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <2 x double> poison, double [[TMP25]], i64 0
338+
; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT12]], <2 x double> poison, <2 x i32> zeroinitializer
339+
; CHECK-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP23]], align 8
340+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <2 x double> poison, double [[TMP26]], i64 0
341+
; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT14]], <2 x double> poison, <2 x i32> zeroinitializer
342+
; CHECK-NEXT: [[TMP27:%.*]] = load double, ptr [[TMP23]], align 8
343+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT16:%.*]] = insertelement <2 x double> poison, double [[TMP27]], i64 0
344+
; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT16]], <2 x double> poison, <2 x i32> zeroinitializer
340345
; CHECK-NEXT: [[TMP28:%.*]] = fmul <2 x double> [[WIDE_LOAD]], splat (double 5.000000e+00)
341346
; CHECK-NEXT: [[TMP29:%.*]] = fmul <2 x double> [[WIDE_LOAD12]], splat (double 5.000000e+00)
342347
; CHECK-NEXT: [[TMP30:%.*]] = fmul <2 x double> [[WIDE_LOAD13]], splat (double 5.000000e+00)
343348
; CHECK-NEXT: [[TMP31:%.*]] = fmul <2 x double> [[WIDE_LOAD14]], splat (double 5.000000e+00)
344349
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr { double, double }, ptr [[B]], i64 [[INDEX]]
345-
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr { double, double }, ptr [[B]], i64 [[TMP20]]
346350
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr { double, double }, ptr [[B]], i64 [[TMP21]]
351+
; CHECK-NEXT: [[TMP36:%.*]] = getelementptr { double, double }, ptr [[B]], i64 [[TMP20]]
347352
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr { double, double }, ptr [[B]], i64 [[TMP22]]
348-
; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <2 x double> [[TMP28]], <2 x double> [[TMP28]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
349-
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP36]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
350-
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP32]], align 8
351-
; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <2 x double> [[TMP29]], <2 x double> [[TMP29]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
352-
; CHECK-NEXT: [[INTERLEAVED_VEC15:%.*]] = shufflevector <4 x double> [[TMP37]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
353-
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC15]], ptr [[TMP33]], align 8
354-
; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <2 x double> [[TMP30]], <2 x double> [[TMP30]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
355-
; CHECK-NEXT: [[INTERLEAVED_VEC16:%.*]] = shufflevector <4 x double> [[TMP38]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
356-
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC16]], ptr [[TMP34]], align 8
357-
; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <2 x double> [[TMP31]], <2 x double> [[TMP31]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
358-
; CHECK-NEXT: [[INTERLEAVED_VEC17:%.*]] = shufflevector <4 x double> [[TMP39]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
359-
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC17]], ptr [[TMP35]], align 8
353+
; CHECK-NEXT: store <2 x double> [[TMP28]], ptr [[TMP32]], align 8
354+
; CHECK-NEXT: store <2 x double> [[TMP29]], ptr [[TMP34]], align 8
355+
; CHECK-NEXT: store <2 x double> [[TMP30]], ptr [[TMP36]], align 8
356+
; CHECK-NEXT: store <2 x double> [[TMP31]], ptr [[TMP35]], align 8
360357
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr { double, double }, ptr [[C]], i64 [[INDEX]]
361-
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr { double, double }, ptr [[C]], i64 [[TMP20]]
362358
; CHECK-NEXT: [[TMP42:%.*]] = getelementptr { double, double }, ptr [[C]], i64 [[TMP21]]
359+
; CHECK-NEXT: [[TMP38:%.*]] = getelementptr { double, double }, ptr [[C]], i64 [[TMP20]]
363360
; CHECK-NEXT: [[TMP43:%.*]] = getelementptr { double, double }, ptr [[C]], i64 [[TMP22]]
364-
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP40]], align 8
365-
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC15]], ptr [[TMP41]], align 8
366-
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC16]], ptr [[TMP42]], align 8
367-
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC17]], ptr [[TMP43]], align 8
368-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
361+
; CHECK-NEXT: store <2 x double> [[TMP28]], ptr [[TMP40]], align 8
362+
; CHECK-NEXT: store <2 x double> [[TMP29]], ptr [[TMP42]], align 8
363+
; CHECK-NEXT: store <2 x double> [[TMP30]], ptr [[TMP38]], align 8
364+
; CHECK-NEXT: store <2 x double> [[TMP31]], ptr [[TMP43]], align 8
365+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
369366
; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
370367
; CHECK-NEXT: br i1 [[TMP44]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
371368
; CHECK: [[MIDDLE_BLOCK]]:

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll

Lines changed: 12 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -75,32 +75,25 @@ define void @test_2xi64_with_wide_load(ptr noalias %data, ptr noalias %factor) {
7575
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
7676
; CHECK: [[VECTOR_BODY]]:
7777
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
78-
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 2
78+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
7979
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]]
80-
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2
81-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8
82-
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
80+
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
81+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
82+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
83+
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP1]], align 8
84+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
85+
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer
8386
; CHECK-NEXT: [[TMP6:%.*]] = shl nsw i64 [[INDEX]], 1
8487
; CHECK-NEXT: [[TMP7:%.*]] = shl nsw i64 [[TMP0]], 1
8588
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]]
8689
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP7]]
87-
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8
88-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
89-
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
90-
; CHECK-NEXT: [[WIDE_VEC3:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8
91-
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC3]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
92-
; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <4 x i64> [[WIDE_VEC3]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
93-
; CHECK-NEXT: [[TMP10:%.*]] = mul <2 x i64> [[BROADCAST_SPLAT]], [[WIDE_LOAD]]
94-
; CHECK-NEXT: [[TMP11:%.*]] = mul <2 x i64> [[BROADCAST_SPLAT3]], [[WIDE_LOAD1]]
90+
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = load <2 x i64>, ptr [[TMP8]], align 8
91+
; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8
9592
; CHECK-NEXT: [[TMP15:%.*]] = mul <2 x i64> [[BROADCAST_SPLAT]], [[STRIDED_VEC2]]
9693
; CHECK-NEXT: [[TMP16:%.*]] = mul <2 x i64> [[BROADCAST_SPLAT3]], [[STRIDED_VEC5]]
97-
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x i64> [[TMP10]], <2 x i64> [[TMP15]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
98-
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP17]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
99-
; CHECK-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 8
100-
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x i64> [[TMP11]], <2 x i64> [[TMP16]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
101-
; CHECK-NEXT: [[INTERLEAVED_VEC6:%.*]] = shufflevector <4 x i64> [[TMP18]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
102-
; CHECK-NEXT: store <4 x i64> [[INTERLEAVED_VEC6]], ptr [[TMP9]], align 8
103-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
94+
; CHECK-NEXT: store <2 x i64> [[TMP15]], ptr [[TMP8]], align 8
95+
; CHECK-NEXT: store <2 x i64> [[TMP16]], ptr [[TMP9]], align 8
96+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
10497
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
10598
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
10699
; CHECK: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)