@@ -130,14 +130,10 @@ define void @frem_v16f32(<16 x float> %a0, <16 x float> %a1, ptr%p3) nounwind {
130130; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
131131; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
132132; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
133- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
134- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
135- ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
136- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
137- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
133+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Reload
134+ ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
135+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 32-byte Reload
138136; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
139- ; CHECK-NEXT: vmovaps %xmm2, %xmm0
140- ; CHECK-NEXT: vzeroupper
141137; CHECK-NEXT: callq fmodf@PLT
142138; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
143139; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
@@ -195,14 +191,10 @@ define void @frem_v16f32(<16 x float> %a0, <16 x float> %a1, ptr%p3) nounwind {
195191; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
196192; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
197193; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
198- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
199- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
200- ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
201- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
202- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
194+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Reload
195+ ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
196+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 32-byte Reload
203197; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
204- ; CHECK-NEXT: vmovaps %xmm2, %xmm0
205- ; CHECK-NEXT: vzeroupper
206198; CHECK-NEXT: callq fmodf@PLT
207199; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
208200; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
@@ -280,14 +272,10 @@ define void @frem_v8f32(<8 x float> %a0, <8 x float> %a1, ptr%p3) nounwind {
280272; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
281273; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
282274; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
283- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
284- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
285- ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
286- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
287- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
275+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Reload
276+ ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
277+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 32-byte Reload
288278; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
289- ; CHECK-NEXT: vmovaps %xmm2, %xmm0
290- ; CHECK-NEXT: vzeroupper
291279; CHECK-NEXT: callq fmodf@PLT
292280; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
293281; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
@@ -389,14 +377,10 @@ define void @frem_v8f64(<8 x double> %a0, <8 x double> %a1, ptr%p3) nounwind {
389377; CHECK-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
390378; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
391379; CHECK-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
392- ; CHECK-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
393- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
394- ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
395- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
396- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
380+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Reload
381+ ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
382+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 32-byte Reload
397383; CHECK-NEXT: vmovaps %xmm1, (%rsp) # 16-byte Spill
398- ; CHECK-NEXT: vmovaps %xmm2, %xmm0
399- ; CHECK-NEXT: vzeroupper
400384; CHECK-NEXT: callq fmod@PLT
401385; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
402386; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
@@ -422,14 +406,10 @@ define void @frem_v8f64(<8 x double> %a0, <8 x double> %a1, ptr%p3) nounwind {
422406; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
423407; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
424408; CHECK-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
425- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
426- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
427- ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
428- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
429- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
409+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Reload
410+ ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
411+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 32-byte Reload
430412; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
431- ; CHECK-NEXT: vmovaps %xmm2, %xmm0
432- ; CHECK-NEXT: vzeroupper
433413; CHECK-NEXT: callq fmod@PLT
434414; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
435415; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
@@ -475,14 +455,10 @@ define void @frem_v4f64(<4 x double> %a0, <4 x double> %a1, ptr%p3) nounwind {
475455; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
476456; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
477457; CHECK-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
478- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
479- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
480- ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
481- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
482- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
458+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Reload
459+ ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
460+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 32-byte Reload
483461; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
484- ; CHECK-NEXT: vmovaps %xmm2, %xmm0
485- ; CHECK-NEXT: vzeroupper
486462; CHECK-NEXT: callq fmod@PLT
487463; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
488464; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
@@ -545,11 +521,9 @@ define void @frem_v32f16(<32 x half> %a0, <32 x half> %a1, ptr%p3) nounwind {
545521; CHECK-NEXT: vzeroupper
546522; CHECK-NEXT: callq __extendhfsf2@PLT
547523; CHECK-NEXT: vmovd %xmm0, (%rsp) # 4-byte Folded Spill
548- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
549- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
524+ ; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Reload
550525; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
551526; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
552- ; CHECK-NEXT: vzeroupper
553527; CHECK-NEXT: callq __extendhfsf2@PLT
554528; CHECK-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload
555529; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero
@@ -773,18 +747,15 @@ define void @frem_v32f16(<32 x half> %a0, <32 x half> %a1, ptr%p3) nounwind {
773747; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
774748; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
775749; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
776- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
777- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
750+ ; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Reload
778751; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
779752; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
780753; CHECK-NEXT: vzeroupper
781754; CHECK-NEXT: callq __extendhfsf2@PLT
782755; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
783- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
784- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
756+ ; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Reload
785757; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
786758; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
787- ; CHECK-NEXT: vzeroupper
788759; CHECK-NEXT: callq __extendhfsf2@PLT
789760; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
790761; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero
@@ -1033,11 +1004,9 @@ define void @frem_v16f16(<16 x half> %a0, <16 x half> %a1, ptr%p3) nounwind {
10331004; CHECK-NEXT: vzeroupper
10341005; CHECK-NEXT: callq __extendhfsf2@PLT
10351006; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1036- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1037- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
1007+ ; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Reload
10381008; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
10391009; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1040- ; CHECK-NEXT: vzeroupper
10411010; CHECK-NEXT: callq __extendhfsf2@PLT
10421011; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
10431012; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero
0 commit comments