@@ -28,14 +28,11 @@ define float @fmaxnum(ptr %src, i64 %n) {
2828; IC3-NEXT: [[TMP4]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI1]], <4 x float> [[WIDE_LOAD3]])
2929; IC3-NEXT: [[TMP5]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI2]], <4 x float> [[WIDE_LOAD4]])
3030; IC3-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 12
31- ; IC3-NEXT: [[TMP6:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
32- ; IC3-NEXT: [[TMP7:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD3]], [[WIDE_LOAD3]]
3331; IC3-NEXT: [[TMP8:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD4]], [[WIDE_LOAD4]]
34- ; IC3-NEXT: [[TMP9 :%.*]] = freeze <4 x i1 > [[TMP6 ]]
32+ ; IC3-NEXT: [[TMP7 :%.*]] = fcmp uno <4 x float > [[WIDE_LOAD]], [[WIDE_LOAD3 ]]
3533; IC3-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP7]]
36- ; IC3-NEXT: [[TMP11:%.*]] = or <4 x i1> [[TMP9]], [[TMP10]]
3734; IC3-NEXT: [[TMP12:%.*]] = freeze <4 x i1> [[TMP8]]
38- ; IC3-NEXT: [[TMP13:%.*]] = or <4 x i1> [[TMP11 ]], [[TMP12]]
35+ ; IC3-NEXT: [[TMP13:%.*]] = or <4 x i1> [[TMP10 ]], [[TMP12]]
3936; IC3-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]])
4037; IC3-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4138; IC3-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[TMP15]]
@@ -86,17 +83,11 @@ define float @fmaxnum(ptr %src, i64 %n) {
8683; IC4-NEXT: [[TMP6]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI2]], <4 x float> [[WIDE_LOAD5]])
8784; IC4-NEXT: [[TMP7]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI3]], <4 x float> [[WIDE_LOAD6]])
8885; IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
89- ; IC4-NEXT: [[TMP8:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
90- ; IC4-NEXT: [[TMP9:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD4]], [[WIDE_LOAD4]]
91- ; IC4-NEXT: [[TMP24:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD5]], [[WIDE_LOAD5]]
92- ; IC4-NEXT: [[TMP25:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD6]]
93- ; IC4-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP8]]
94- ; IC4-NEXT: [[TMP11:%.*]] = freeze <4 x i1> [[TMP9]]
95- ; IC4-NEXT: [[TMP12:%.*]] = or <4 x i1> [[TMP10]], [[TMP11]]
86+ ; IC4-NEXT: [[TMP24:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD4]]
87+ ; IC4-NEXT: [[TMP25:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD5]], [[WIDE_LOAD6]]
9688; IC4-NEXT: [[TMP26:%.*]] = freeze <4 x i1> [[TMP24]]
97- ; IC4-NEXT: [[TMP27:%.*]] = or <4 x i1> [[TMP12]], [[TMP26]]
9889; IC4-NEXT: [[TMP28:%.*]] = freeze <4 x i1> [[TMP25]]
99- ; IC4-NEXT: [[TMP29:%.*]] = or <4 x i1> [[TMP27 ]], [[TMP28]]
90+ ; IC4-NEXT: [[TMP29:%.*]] = or <4 x i1> [[TMP26 ]], [[TMP28]]
10091; IC4-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP29]])
10192; IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
10293; IC4-NEXT: [[TMP15:%.*]] = or i1 [[TMP13]], [[TMP14]]
@@ -153,18 +144,12 @@ define float @fmaxnum(ptr %src, i64 %n) {
153144; IC5-NEXT: [[TMP8]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI3]], <4 x float> [[WIDE_LOAD7]])
154145; IC5-NEXT: [[TMP9]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI4]], <4 x float> [[WIDE_LOAD8]])
155146; IC5-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 20
156- ; IC5-NEXT: [[TMP10:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
157- ; IC5-NEXT: [[TMP11:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD5]], [[WIDE_LOAD5]]
158- ; IC5-NEXT: [[TMP12:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD6]]
159- ; IC5-NEXT: [[TMP13:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD7]], [[WIDE_LOAD7]]
160147; IC5-NEXT: [[TMP14:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD8]], [[WIDE_LOAD8]]
161- ; IC5-NEXT: [[TMP15:%.*]] = freeze <4 x i1> [[TMP10]]
162- ; IC5-NEXT: [[TMP16:%.*]] = freeze <4 x i1> [[TMP11]]
163- ; IC5-NEXT: [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP16]]
148+ ; IC5-NEXT: [[TMP12:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD5]]
149+ ; IC5-NEXT: [[TMP13:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD7]]
164150; IC5-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP12]]
165- ; IC5-NEXT: [[TMP19:%.*]] = or <4 x i1> [[TMP17]], [[TMP18]]
166151; IC5-NEXT: [[TMP20:%.*]] = freeze <4 x i1> [[TMP13]]
167- ; IC5-NEXT: [[TMP21:%.*]] = or <4 x i1> [[TMP19 ]], [[TMP20]]
152+ ; IC5-NEXT: [[TMP21:%.*]] = or <4 x i1> [[TMP18 ]], [[TMP20]]
168153; IC5-NEXT: [[TMP22:%.*]] = freeze <4 x i1> [[TMP14]]
169154; IC5-NEXT: [[TMP23:%.*]] = or <4 x i1> [[TMP21]], [[TMP22]]
170155; IC5-NEXT: [[TMP24:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP23]])
0 commit comments