Skip to content

Commit 2bec3bb

Browse files
fhahndvbuka
authored andcommitted
[VPlan] Limit narrowInterleaveGroups to single block regions for now.
Currently only regions with a single block are supported by the legality checks.
1 parent 122d849 commit 2bec3bb

File tree

2 files changed

+46
-30
lines changed

2 files changed

+46
-30
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4119,7 +4119,7 @@ static bool isAlreadyNarrow(VPValue *VPV) {
41194119
void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
41204120
unsigned VectorRegWidth) {
41214121
VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion();
4122-
if (!VectorLoop)
4122+
if (!VectorLoop || VectorLoop->getEntry()->getNumSuccessors() != 0)
41234123
return;
41244124

41254125
VPTypeAnalysis TypeInfo(Plan);

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll

Lines changed: 45 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -13,24 +13,28 @@ define void @load_store_interleave_group_block_invar_cond(ptr noalias %data, ptr
1313
; VF2IC1: [[VECTOR_PH]]:
1414
; VF2IC1-NEXT: br label %[[VECTOR_BODY:.*]]
1515
; VF2IC1: [[VECTOR_BODY]]:
16-
; VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
16+
; VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE3:.*]] ]
1717
; VF2IC1-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1
1818
; VF2IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP0]]
19-
; VF2IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8
20-
; VF2IC1-NEXT: store <2 x i64> [[WIDE_LOAD]], ptr [[TMP1]], align 8
19+
; VF2IC1-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
20+
; VF2IC1-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
21+
; VF2IC1-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
22+
; VF2IC1-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[STRIDED_VEC]], <2 x i64> [[STRIDED_VEC1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
23+
; VF2IC1-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
24+
; VF2IC1-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
2125
; VF2IC1-NEXT: br i1 [[C]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
2226
; VF2IC1: [[PRED_STORE_IF]]:
2327
; VF2IC1-NEXT: store i8 1, ptr [[DST_0]], align 1
2428
; VF2IC1-NEXT: br label %[[PRED_STORE_CONTINUE]]
2529
; VF2IC1: [[PRED_STORE_CONTINUE]]:
26-
; VF2IC1-NEXT: br i1 [[C]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
27-
; VF2IC1: [[PRED_STORE_IF1]]:
30+
; VF2IC1-NEXT: br i1 [[C]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3]]
31+
; VF2IC1: [[PRED_STORE_IF2]]:
2832
; VF2IC1-NEXT: store i8 1, ptr [[DST_0]], align 1
29-
; VF2IC1-NEXT: br label %[[PRED_STORE_CONTINUE2]]
30-
; VF2IC1: [[PRED_STORE_CONTINUE2]]:
33+
; VF2IC1-NEXT: br label %[[PRED_STORE_CONTINUE3]]
34+
; VF2IC1: [[PRED_STORE_CONTINUE3]]:
3135
; VF2IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[DST_1]], i64 [[INDEX]]
3236
; VF2IC1-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP2]], align 1
33-
; VF2IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
37+
; VF2IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
3438
; VF2IC1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
3539
; VF2IC1-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3640
; VF2IC1: [[MIDDLE_BLOCK]]:
@@ -45,41 +49,49 @@ define void @load_store_interleave_group_block_invar_cond(ptr noalias %data, ptr
4549
; VF2IC2: [[VECTOR_PH]]:
4650
; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]]
4751
; VF2IC2: [[VECTOR_BODY]]:
48-
; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ]
49-
; VF2IC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
52+
; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ]
53+
; VF2IC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 2
5054
; VF2IC2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1
5155
; VF2IC2-NEXT: [[TMP2:%.*]] = shl nsw i64 [[TMP0]], 1
5256
; VF2IC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]]
5357
; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP2]]
54-
; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
55-
; VF2IC2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
56-
; VF2IC2-NEXT: store <2 x i64> [[WIDE_LOAD]], ptr [[TMP3]], align 8
57-
; VF2IC2-NEXT: store <2 x i64> [[WIDE_LOAD1]], ptr [[TMP4]], align 8
58+
; VF2IC2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
59+
; VF2IC2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
60+
; VF2IC2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
61+
; VF2IC2-NEXT: [[WIDE_VEC2:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
62+
; VF2IC2-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <4 x i64> [[WIDE_VEC2]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
63+
; VF2IC2-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <4 x i64> [[WIDE_VEC2]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
64+
; VF2IC2-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[STRIDED_VEC]], <2 x i64> [[STRIDED_VEC1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
65+
; VF2IC2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP8]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
66+
; VF2IC2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8
67+
; VF2IC2-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[STRIDED_VEC3]], <2 x i64> [[STRIDED_VEC4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
68+
; VF2IC2-NEXT: [[INTERLEAVED_VEC5:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
69+
; VF2IC2-NEXT: store <4 x i64> [[INTERLEAVED_VEC5]], ptr [[TMP4]], align 8
5870
; VF2IC2-NEXT: br i1 [[C]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
5971
; VF2IC2: [[PRED_STORE_IF]]:
6072
; VF2IC2-NEXT: store i8 1, ptr [[DST_0]], align 1
6173
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE]]
6274
; VF2IC2: [[PRED_STORE_CONTINUE]]:
63-
; VF2IC2-NEXT: br i1 [[C]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3:.*]]
64-
; VF2IC2: [[PRED_STORE_IF2]]:
65-
; VF2IC2-NEXT: store i8 1, ptr [[DST_0]], align 1
66-
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE3]]
67-
; VF2IC2: [[PRED_STORE_CONTINUE3]]:
68-
; VF2IC2-NEXT: br i1 [[C]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]]
69-
; VF2IC2: [[PRED_STORE_IF4]]:
70-
; VF2IC2-NEXT: store i8 1, ptr [[DST_0]], align 1
71-
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE5]]
72-
; VF2IC2: [[PRED_STORE_CONTINUE5]]:
73-
; VF2IC2-NEXT: br i1 [[C]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7]]
75+
; VF2IC2-NEXT: br i1 [[C]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]]
7476
; VF2IC2: [[PRED_STORE_IF6]]:
7577
; VF2IC2-NEXT: store i8 1, ptr [[DST_0]], align 1
7678
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE7]]
7779
; VF2IC2: [[PRED_STORE_CONTINUE7]]:
80+
; VF2IC2-NEXT: br i1 [[C]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]]
81+
; VF2IC2: [[PRED_STORE_IF8]]:
82+
; VF2IC2-NEXT: store i8 1, ptr [[DST_0]], align 1
83+
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE9]]
84+
; VF2IC2: [[PRED_STORE_CONTINUE9]]:
85+
; VF2IC2-NEXT: br i1 [[C]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11]]
86+
; VF2IC2: [[PRED_STORE_IF10]]:
87+
; VF2IC2-NEXT: store i8 1, ptr [[DST_0]], align 1
88+
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE11]]
89+
; VF2IC2: [[PRED_STORE_CONTINUE11]]:
7890
; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[DST_1]], i64 [[INDEX]]
7991
; VF2IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 2
8092
; VF2IC2-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP5]], align 1
8193
; VF2IC2-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP6]], align 1
82-
; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
94+
; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
8395
; VF2IC2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
8496
; VF2IC2-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
8597
; VF2IC2: [[MIDDLE_BLOCK]]:
@@ -128,8 +140,12 @@ define void @load_store_interleave_group_block_var_cond(ptr noalias %data, ptr %
128140
; VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE3:.*]] ]
129141
; VF2IC1-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1
130142
; VF2IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP0]]
131-
; VF2IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8
132-
; VF2IC1-NEXT: store <2 x i64> [[WIDE_LOAD]], ptr [[TMP1]], align 8
143+
; VF2IC1-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
144+
; VF2IC1-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
145+
; VF2IC1-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
146+
; VF2IC1-NEXT: [[TMP11:%.*]] = shufflevector <2 x i64> [[STRIDED_VEC]], <2 x i64> [[STRIDED_VEC1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
147+
; VF2IC1-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP11]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
148+
; VF2IC1-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
133149
; VF2IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[MASKS]], i64 [[INDEX]]
134150
; VF2IC1-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i8>, ptr [[TMP2]], align 1
135151
; VF2IC1-NEXT: [[TMP3:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD1]], zeroinitializer
@@ -149,7 +165,7 @@ define void @load_store_interleave_group_block_var_cond(ptr noalias %data, ptr %
149165
; VF2IC1-NEXT: store i8 1, ptr [[TMP9]], align 1
150166
; VF2IC1-NEXT: br label %[[PRED_STORE_CONTINUE3]]
151167
; VF2IC1: [[PRED_STORE_CONTINUE3]]:
152-
; VF2IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
168+
; VF2IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
153169
; VF2IC1-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
154170
; VF2IC1-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
155171
; VF2IC1: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)