@@ -14,20 +14,16 @@ define void @test_2xi64_mul_add(ptr noalias %data, ptr noalias %factor) {
1414; VF2: [[VECTOR_BODY]]:
1515; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1616; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]]
17- ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8
17+ ; VF2-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP0]], align 8
18+ ; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
19+ ; VF2-NEXT: [[WIDE_LOAD:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
1820; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1
1921; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]]
20- ; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
21- ; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
22- ; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
23- ; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]]
24- ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2)
22+ ; VF2-NEXT: [[STRIDED_VEC1:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8
2523; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]]
2624; VF2-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP5]], splat (i64 2)
27- ; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
28- ; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
29- ; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
30- ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
25+ ; VF2-NEXT: store <2 x i64> [[TMP6]], ptr [[TMP2]], align 8
26+ ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
3127; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
3228; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3329; VF2: [[MIDDLE_BLOCK]]:
@@ -188,20 +184,16 @@ define void @test_2xi64_mul_sub(ptr noalias %data, ptr noalias %factor) {
188184; VF2: [[VECTOR_BODY]]:
189185; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
190186; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]]
191- ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8
187+ ; VF2-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP0]], align 8
188+ ; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
189+ ; VF2-NEXT: [[WIDE_LOAD:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
192190; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1
193191; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]]
194- ; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
195- ; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
196- ; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
197- ; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]]
198- ; VF2-NEXT: [[TMP4:%.*]] = sub <2 x i64> [[TMP3]], splat (i64 2)
192+ ; VF2-NEXT: [[STRIDED_VEC1:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8
199193; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]]
200194; VF2-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP5]], splat (i64 2)
201- ; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
202- ; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
203- ; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
204- ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
195+ ; VF2-NEXT: store <2 x i64> [[TMP6]], ptr [[TMP2]], align 8
196+ ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
205197; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
206198; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
207199; VF2: [[MIDDLE_BLOCK]]:
@@ -420,22 +412,17 @@ define void @test_2xi64_mul_add_xor(ptr noalias %data, ptr noalias %factor) {
420412; VF2: [[VECTOR_BODY]]:
421413; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
422414; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]]
423- ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8
415+ ; VF2-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP0]], align 8
416+ ; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
417+ ; VF2-NEXT: [[WIDE_LOAD:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
424418; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1
425419; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]]
426- ; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
427- ; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
428- ; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
429- ; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]]
430- ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2)
431- ; VF2-NEXT: [[TMP5:%.*]] = xor <2 x i64> splat (i64 4), [[TMP4]]
420+ ; VF2-NEXT: [[STRIDED_VEC1:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8
432421; VF2-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]]
433422; VF2-NEXT: [[TMP7:%.*]] = add <2 x i64> [[TMP6]], splat (i64 2)
434423; VF2-NEXT: [[TMP8:%.*]] = xor <2 x i64> splat (i64 4), [[TMP7]]
435- ; VF2-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
436- ; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
437- ; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
438- ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
424+ ; VF2-NEXT: store <2 x i64> [[TMP8]], ptr [[TMP2]], align 8
425+ ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
439426; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
440427; VF2-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
441428; VF2: [[MIDDLE_BLOCK]]:
0 commit comments