@@ -8,46 +8,31 @@ define void @test_complex_add_float(ptr %res, ptr noalias %A, ptr noalias %B, i6
88; CHECK-LABEL: define void @test_complex_add_float(
99; CHECK-SAME: ptr [[RES:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) {
1010; CHECK-NEXT: [[ENTRY:.*:]]
11- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
11+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
1212; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1313; CHECK: [[VECTOR_PH]]:
14- ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
14+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
1515; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1616; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
1717; CHECK: [[VECTOR_BODY]]:
1818; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
19- ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4
20- ; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[A]], i64 [[INDEX]]
21- ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[A]], i64 [[TMP1]]
22- ; CHECK-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[B]], i64 [[INDEX]]
23- ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[B]], i64 [[TMP1]]
24- ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x float>, ptr [[GEP_A_0]], align 4
19+ ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[A]], i64 [[INDEX]]
20+ ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[B]], i64 [[INDEX]]
21+ ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x float>, ptr [[TMP0]], align 4
2522; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x float> [[WIDE_VEC]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
2623; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x float> [[WIDE_VEC]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
27- ; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <8 x float>, ptr [[TMP3 ]], align 4
24+ ; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <8 x float>, ptr [[TMP1 ]], align 4
2825; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <8 x float> [[WIDE_VEC2]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
2926; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <8 x float> [[WIDE_VEC2]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
30- ; CHECK-NEXT: [[WIDE_VEC5:%.*]] = load <8 x float>, ptr [[GEP_B_0]], align 4
31- ; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <8 x float> [[WIDE_VEC5]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
32- ; CHECK-NEXT: [[STRIDED_VEC7:%.*]] = shufflevector <8 x float> [[WIDE_VEC5]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
33- ; CHECK-NEXT: [[WIDE_VEC8:%.*]] = load <8 x float>, ptr [[TMP5]], align 4
34- ; CHECK-NEXT: [[STRIDED_VEC9:%.*]] = shufflevector <8 x float> [[WIDE_VEC8]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
35- ; CHECK-NEXT: [[STRIDED_VEC10:%.*]] = shufflevector <8 x float> [[WIDE_VEC8]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
36- ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[STRIDED_VEC]], [[STRIDED_VEC6]]
37- ; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x float> [[STRIDED_VEC3]], [[STRIDED_VEC9]]
38- ; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[STRIDED_VEC1]], [[STRIDED_VEC7]]
39- ; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[STRIDED_VEC4]], [[STRIDED_VEC10]]
40- ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[RES]], i64 [[INDEX]]
41- ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[RES]], i64 [[TMP1]]
42- ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
43- ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
44- ; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC]], ptr [[TMP10]], align 4
45- ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
46- ; CHECK-NEXT: [[INTERLEAVED_VEC11:%.*]] = shufflevector <8 x float> [[TMP13]], <8 x float> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
47- ; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC11]], ptr [[TMP11]], align 4
48- ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
49- ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
50- ; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
27+ ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[STRIDED_VEC]], [[STRIDED_VEC3]]
28+ ; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[STRIDED_VEC1]], [[STRIDED_VEC4]]
29+ ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[RES]], i64 [[INDEX]]
30+ ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
31+ ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
32+ ; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 4
33+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
34+ ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
35+ ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
5136; CHECK: [[MIDDLE_BLOCK]]:
5237; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
5338; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
@@ -84,32 +69,24 @@ define void @test_complex_add_double(ptr %res, ptr noalias %A, ptr noalias %B, i
8469; CHECK-LABEL: define void @test_complex_add_double(
8570; CHECK-SAME: ptr [[RES:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) {
8671; CHECK-NEXT: [[ENTRY:.*:]]
87- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
72+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
8873; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
8974; CHECK: [[VECTOR_PH]]:
90- ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
75+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
9176; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
9277; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
9378; CHECK: [[VECTOR_BODY]]:
9479; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
95- ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
96- ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[A]], i64 [[INDEX]]
97- ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[A]], i64 [[TMP1]]
98- ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i64 [[INDEX]]
99- ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i64 [[TMP1]]
100- ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = load <2 x double>, ptr [[TMP2]], align 4
101- ; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = load <2 x double>, ptr [[TMP3]], align 4
102- ; CHECK-NEXT: [[STRIDED_VEC7:%.*]] = load <2 x double>, ptr [[TMP4]], align 4
103- ; CHECK-NEXT: [[STRIDED_VEC11:%.*]] = load <2 x double>, ptr [[TMP5]], align 4
104- ; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[STRIDED_VEC1]], [[STRIDED_VEC7]]
105- ; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x double> [[STRIDED_VEC5]], [[STRIDED_VEC11]]
106- ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[RES]], i64 [[INDEX]]
107- ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[RES]], i64 [[TMP1]]
108- ; CHECK-NEXT: store <2 x double> [[TMP8]], ptr [[TMP10]], align 4
109- ; CHECK-NEXT: store <2 x double> [[TMP15]], ptr [[TMP11]], align 4
110- ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
111- ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
112- ; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
80+ ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[A]], i64 [[INDEX]]
81+ ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i64 [[INDEX]]
82+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 4
83+ ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP1]], align 4
84+ ; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[WIDE_LOAD]], [[WIDE_LOAD1]]
85+ ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[RES]], i64 [[INDEX]]
86+ ; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[TMP3]], align 4
87+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
88+ ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
89+ ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
11390; CHECK: [[MIDDLE_BLOCK]]:
11491; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
11592; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
0 commit comments