@@ -13,24 +13,28 @@ define void @load_store_interleave_group_block_invar_cond(ptr noalias %data, ptr
1313; VF2IC1: [[VECTOR_PH]]:
1414; VF2IC1-NEXT: br label %[[VECTOR_BODY:.*]]
1515; VF2IC1: [[VECTOR_BODY]]:
16- ; VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2 :.*]] ]
16+ ; VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE3 :.*]] ]
1717; VF2IC1-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1
1818; VF2IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP0]]
19- ; VF2IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8
20- ; VF2IC1-NEXT: store <2 x i64> [[WIDE_LOAD]], ptr [[TMP1]], align 8
19+ ; VF2IC1-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
20+ ; VF2IC1-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
21+ ; VF2IC1-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
22+ ; VF2IC1-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[STRIDED_VEC]], <2 x i64> [[STRIDED_VEC1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
23+ ; VF2IC1-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
24+ ; VF2IC1-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
2125; VF2IC1-NEXT: br i1 [[C]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
2226; VF2IC1: [[PRED_STORE_IF]]:
2327; VF2IC1-NEXT: store i8 1, ptr [[DST_0]], align 1
2428; VF2IC1-NEXT: br label %[[PRED_STORE_CONTINUE]]
2529; VF2IC1: [[PRED_STORE_CONTINUE]]:
26- ; VF2IC1-NEXT: br i1 [[C]], label %[[PRED_STORE_IF1 :.*]], label %[[PRED_STORE_CONTINUE2 ]]
27- ; VF2IC1: [[PRED_STORE_IF1 ]]:
30+ ; VF2IC1-NEXT: br i1 [[C]], label %[[PRED_STORE_IF2 :.*]], label %[[PRED_STORE_CONTINUE3 ]]
31+ ; VF2IC1: [[PRED_STORE_IF2 ]]:
2832; VF2IC1-NEXT: store i8 1, ptr [[DST_0]], align 1
29- ; VF2IC1-NEXT: br label %[[PRED_STORE_CONTINUE2 ]]
30- ; VF2IC1: [[PRED_STORE_CONTINUE2 ]]:
33+ ; VF2IC1-NEXT: br label %[[PRED_STORE_CONTINUE3 ]]
34+ ; VF2IC1: [[PRED_STORE_CONTINUE3 ]]:
3135; VF2IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[DST_1]], i64 [[INDEX]]
3236; VF2IC1-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP2]], align 1
33- ; VF2IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
37+ ; VF2IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
3438; VF2IC1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
3539; VF2IC1-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3640; VF2IC1: [[MIDDLE_BLOCK]]:
@@ -45,41 +49,49 @@ define void @load_store_interleave_group_block_invar_cond(ptr noalias %data, ptr
4549; VF2IC2: [[VECTOR_PH]]:
4650; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]]
4751; VF2IC2: [[VECTOR_BODY]]:
48- ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7 :.*]] ]
49- ; VF2IC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
52+ ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11 :.*]] ]
53+ ; VF2IC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 2
5054; VF2IC2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1
5155; VF2IC2-NEXT: [[TMP2:%.*]] = shl nsw i64 [[TMP0]], 1
5256; VF2IC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]]
5357; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP2]]
54- ; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
55- ; VF2IC2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
56- ; VF2IC2-NEXT: store <2 x i64> [[WIDE_LOAD]], ptr [[TMP3]], align 8
57- ; VF2IC2-NEXT: store <2 x i64> [[WIDE_LOAD1]], ptr [[TMP4]], align 8
58+ ; VF2IC2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
59+ ; VF2IC2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
60+ ; VF2IC2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
61+ ; VF2IC2-NEXT: [[WIDE_VEC2:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
62+ ; VF2IC2-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <4 x i64> [[WIDE_VEC2]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
63+ ; VF2IC2-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <4 x i64> [[WIDE_VEC2]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
64+ ; VF2IC2-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[STRIDED_VEC]], <2 x i64> [[STRIDED_VEC1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
65+ ; VF2IC2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP8]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
66+ ; VF2IC2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8
67+ ; VF2IC2-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[STRIDED_VEC3]], <2 x i64> [[STRIDED_VEC4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
68+ ; VF2IC2-NEXT: [[INTERLEAVED_VEC5:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
69+ ; VF2IC2-NEXT: store <4 x i64> [[INTERLEAVED_VEC5]], ptr [[TMP4]], align 8
5870; VF2IC2-NEXT: br i1 [[C]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
5971; VF2IC2: [[PRED_STORE_IF]]:
6072; VF2IC2-NEXT: store i8 1, ptr [[DST_0]], align 1
6173; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE]]
6274; VF2IC2: [[PRED_STORE_CONTINUE]]:
63- ; VF2IC2-NEXT: br i1 [[C]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3:.*]]
64- ; VF2IC2: [[PRED_STORE_IF2]]:
65- ; VF2IC2-NEXT: store i8 1, ptr [[DST_0]], align 1
66- ; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE3]]
67- ; VF2IC2: [[PRED_STORE_CONTINUE3]]:
68- ; VF2IC2-NEXT: br i1 [[C]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]]
69- ; VF2IC2: [[PRED_STORE_IF4]]:
70- ; VF2IC2-NEXT: store i8 1, ptr [[DST_0]], align 1
71- ; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE5]]
72- ; VF2IC2: [[PRED_STORE_CONTINUE5]]:
73- ; VF2IC2-NEXT: br i1 [[C]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7]]
75+ ; VF2IC2-NEXT: br i1 [[C]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]]
7476; VF2IC2: [[PRED_STORE_IF6]]:
7577; VF2IC2-NEXT: store i8 1, ptr [[DST_0]], align 1
7678; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE7]]
7779; VF2IC2: [[PRED_STORE_CONTINUE7]]:
80+ ; VF2IC2-NEXT: br i1 [[C]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]]
81+ ; VF2IC2: [[PRED_STORE_IF8]]:
82+ ; VF2IC2-NEXT: store i8 1, ptr [[DST_0]], align 1
83+ ; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE9]]
84+ ; VF2IC2: [[PRED_STORE_CONTINUE9]]:
85+ ; VF2IC2-NEXT: br i1 [[C]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11]]
86+ ; VF2IC2: [[PRED_STORE_IF10]]:
87+ ; VF2IC2-NEXT: store i8 1, ptr [[DST_0]], align 1
88+ ; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE11]]
89+ ; VF2IC2: [[PRED_STORE_CONTINUE11]]:
7890; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[DST_1]], i64 [[INDEX]]
7991; VF2IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 2
8092; VF2IC2-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP5]], align 1
8193; VF2IC2-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP6]], align 1
82- ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
94+ ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
8395; VF2IC2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
8496; VF2IC2-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
8597; VF2IC2: [[MIDDLE_BLOCK]]:
@@ -128,8 +140,12 @@ define void @load_store_interleave_group_block_var_cond(ptr noalias %data, ptr %
128140; VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE3:.*]] ]
129141; VF2IC1-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1
130142; VF2IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP0]]
131- ; VF2IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8
132- ; VF2IC1-NEXT: store <2 x i64> [[WIDE_LOAD]], ptr [[TMP1]], align 8
143+ ; VF2IC1-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
144+ ; VF2IC1-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
145+ ; VF2IC1-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
146+ ; VF2IC1-NEXT: [[TMP11:%.*]] = shufflevector <2 x i64> [[STRIDED_VEC]], <2 x i64> [[STRIDED_VEC1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
147+ ; VF2IC1-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP11]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
148+ ; VF2IC1-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
133149; VF2IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[MASKS]], i64 [[INDEX]]
134150; VF2IC1-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i8>, ptr [[TMP2]], align 1
135151; VF2IC1-NEXT: [[TMP3:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD1]], zeroinitializer
@@ -149,7 +165,7 @@ define void @load_store_interleave_group_block_var_cond(ptr noalias %data, ptr %
149165; VF2IC1-NEXT: store i8 1, ptr [[TMP9]], align 1
150166; VF2IC1-NEXT: br label %[[PRED_STORE_CONTINUE3]]
151167; VF2IC1: [[PRED_STORE_CONTINUE3]]:
152- ; VF2IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
168+ ; VF2IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
153169; VF2IC1-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
154170; VF2IC1-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
155171; VF2IC1: [[MIDDLE_BLOCK]]:
0 commit comments