@@ -1189,51 +1189,67 @@ define double @fmul_lane_d(double %A, <2 x double> %vec) nounwind {
11891189define <4 x float > @fmul_insert_zero (<4 x float > %A , <4 x float > %B ) {
11901190; CHECK-LABEL: fmul_insert_zero:
11911191; CHECK: // %bb.0:
1192- ; CHECK-NEXT: mov.s v0[3], wzr
1192+ ; CHECK-NEXT: movi d2, #0000000000000000
1193+ ; CHECK-NEXT: mov.s v0[3], v2[0]
11931194; CHECK-NEXT: fmul.4s v0, v0, v1
11941195; CHECK-NEXT: ret
1195- %mul = fmul <4 x float > %A , %B
1196+ %mul = fmul fast <4 x float > %A , %B
11961197 %mul_set_lane = insertelement <4 x float > %mul , float 0 .000000e+00 , i64 3
11971198 ret <4 x float > %mul_set_lane
11981199}
11991200
12001201define <4 x float > @fmul_insert_zero_same (<4 x float > %A ) {
12011202; CHECK-LABEL: fmul_insert_zero_same:
12021203; CHECK: // %bb.0:
1203- ; CHECK-NEXT: mov.s v0[3], wzr
1204+ ; CHECK-NEXT: movi d1, #0000000000000000
1205+ ; CHECK-NEXT: mov.s v0[3], v1[0]
12041206; CHECK-NEXT: fmul.4s v0, v0, v0
12051207; CHECK-NEXT: ret
1206- %mul = fmul <4 x float > %A , %A
1208+ %mul = fmul fast <4 x float > %A , %A
12071209 %mul_set_lane = insertelement <4 x float > %mul , float 0 .000000e+00 , i64 3
12081210 ret <4 x float > %mul_set_lane
12091211}
12101212
12111213define <4 x float > @fmul_insert_zero1 (<4 x float > %A , <4 x float > %B , <4 x float > %C ) {
12121214; CHECK-LABEL: fmul_insert_zero1:
12131215; CHECK: // %bb.0:
1216+ ; CHECK-NEXT: movi d3, #0000000000000000
12141217; CHECK-NEXT: fsub.4s v0, v2, v0
1215- ; CHECK-NEXT: mov.s v1[3], wzr
1218+ ; CHECK-NEXT: mov.s v1[3], v3[0]
12161219; CHECK-NEXT: fmul.4s v0, v1, v0
12171220; CHECK-NEXT: ret
12181221 %sub = fsub <4 x float > %C , %A
1219- %mul = fmul <4 x float > %B , %sub
1222+ %mul = fmul fast <4 x float > %B , %sub
12201223 %mul_set_lane = insertelement <4 x float > %mul , float 0 .000000e+00 , i64 3
12211224 ret <4 x float > %mul_set_lane
12221225}
12231226
12241227define <4 x float > @fmul_insert_zero2 (<4 x float > %A , <4 x float > %B ) {
12251228; CHECK-LABEL: fmul_insert_zero2:
12261229; CHECK: // %bb.0:
1227- ; CHECK-NEXT: mov.s v0[3], wzr
1230+ ; CHECK-NEXT: movi d2, #0000000000000000
1231+ ; CHECK-NEXT: mov.s v0[3], v2[0]
12281232; CHECK-NEXT: fmul.4s v0, v0, v1
12291233; CHECK-NEXT: fsub.4s v0, v1, v0
12301234; CHECK-NEXT: ret
1231- %mul = fmul <4 x float > %B , %A
1235+ %mul = fmul fast <4 x float > %B , %A
12321236 %mul_set_lane = insertelement <4 x float > %mul , float 0 .000000e+00 , i64 3
12331237 %sub = fsub <4 x float > %B , %mul_set_lane
12341238 ret <4 x float > %sub
12351239}
12361240
1241+ define <4 x float > @fmul_insert_zero_nofast (<4 x float > %A , <4 x float > %B ) {
1242+ ; CHECK-LABEL: fmul_insert_zero_nofast:
1243+ ; CHECK: // %bb.0:
1244+ ; CHECK-NEXT: movi d2, #0000000000000000
1245+ ; CHECK-NEXT: fmul.4s v0, v0, v1
1246+ ; CHECK-NEXT: mov.s v0[3], v2[0]
1247+ ; CHECK-NEXT: ret
1248+ %mul = fmul <4 x float > %A , %B
1249+ %mul_set_lane = insertelement <4 x float > %mul , float 0 .000000e+00 , i64 3
1250+ ret <4 x float > %mul_set_lane
1251+ }
1252+
12371253define <2 x float > @fmulx_lane_2s (<2 x float > %A , <2 x float > %B ) nounwind {
12381254; CHECK-LABEL: fmulx_lane_2s:
12391255; CHECK: // %bb.0:
0 commit comments