@@ -698,7 +698,7 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) nounwind {
698698;
699699; AVX2-LABEL: stest_f16i32:
700700; AVX2: # %bb.0: # %entry
701- ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
701+ ; AVX2-NEXT: vpsrlq $48, % xmm0, %xmm1
702702; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
703703; AVX2-NEXT: vcvttss2si %xmm1, %rax
704704; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
@@ -709,7 +709,7 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) nounwind {
709709; AVX2-NEXT: vcvttss2si %xmm1, %rax
710710; AVX2-NEXT: vmovq %rcx, %xmm1
711711; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
712- ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
712+ ; AVX2-NEXT: vpsrld $16, % xmm0, % xmm0
713713; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
714714; AVX2-NEXT: vmovq %rax, %xmm2
715715; AVX2-NEXT: vcvttss2si %xmm0, %rax
@@ -836,7 +836,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
836836;
837837; AVX2-LABEL: utesth_f16i32:
838838; AVX2: # %bb.0: # %entry
839- ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
839+ ; AVX2-NEXT: vpsrlq $48, % xmm0, %xmm1
840840; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2
841841; AVX2-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
842842; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
@@ -866,7 +866,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
866866; AVX2-NEXT: sarq $63, %rdx
867867; AVX2-NEXT: andq %rax, %rdx
868868; AVX2-NEXT: orq %rcx, %rdx
869- ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
869+ ; AVX2-NEXT: vpsrld $16, % xmm0, % xmm0
870870; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
871871; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm1
872872; AVX2-NEXT: vcvttss2si %xmm1, %rax
@@ -999,7 +999,7 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) nounwind {
999999;
10001000; AVX2-LABEL: ustest_f16i32:
10011001; AVX2: # %bb.0: # %entry
1002- ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
1002+ ; AVX2-NEXT: vpsrlq $48, % xmm0, %xmm1
10031003; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
10041004; AVX2-NEXT: vcvttss2si %xmm1, %rax
10051005; AVX2-NEXT: vmovq %rax, %xmm1
@@ -1011,7 +1011,7 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) nounwind {
10111011; AVX2-NEXT: vcvtph2ps %xmm0, %xmm2
10121012; AVX2-NEXT: vcvttss2si %xmm2, %rax
10131013; AVX2-NEXT: vmovq %rax, %xmm2
1014- ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
1014+ ; AVX2-NEXT: vpsrld $16, % xmm0, % xmm0
10151015; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
10161016; AVX2-NEXT: vcvttss2si %xmm0, %rax
10171017; AVX2-NEXT: vmovq %rax, %xmm0
@@ -3310,7 +3310,7 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) nounwind {
33103310;
33113311; AVX2-LABEL: stest_f16i32_mm:
33123312; AVX2: # %bb.0: # %entry
3313- ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
3313+ ; AVX2-NEXT: vpsrlq $48, % xmm0, %xmm1
33143314; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
33153315; AVX2-NEXT: vcvttss2si %xmm1, %rax
33163316; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
@@ -3321,7 +3321,7 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) nounwind {
33213321; AVX2-NEXT: vcvttss2si %xmm1, %rax
33223322; AVX2-NEXT: vmovq %rcx, %xmm1
33233323; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
3324- ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3324+ ; AVX2-NEXT: vpsrld $16, % xmm0, % xmm0
33253325; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
33263326; AVX2-NEXT: vmovq %rax, %xmm2
33273327; AVX2-NEXT: vcvttss2si %xmm0, %rax
@@ -3446,7 +3446,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
34463446;
34473447; AVX2-LABEL: utesth_f16i32_mm:
34483448; AVX2: # %bb.0: # %entry
3449- ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
3449+ ; AVX2-NEXT: vpsrlq $48, % xmm0, %xmm1
34503450; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2
34513451; AVX2-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
34523452; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
@@ -3476,7 +3476,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
34763476; AVX2-NEXT: sarq $63, %rdx
34773477; AVX2-NEXT: andq %rax, %rdx
34783478; AVX2-NEXT: orq %rcx, %rdx
3479- ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3479+ ; AVX2-NEXT: vpsrld $16, % xmm0, % xmm0
34803480; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
34813481; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm1
34823482; AVX2-NEXT: vcvttss2si %xmm1, %rax
@@ -3608,7 +3608,7 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) nounwind {
36083608;
36093609; AVX2-LABEL: ustest_f16i32_mm:
36103610; AVX2: # %bb.0: # %entry
3611- ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
3611+ ; AVX2-NEXT: vpsrlq $48, % xmm0, %xmm1
36123612; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
36133613; AVX2-NEXT: vcvttss2si %xmm1, %rax
36143614; AVX2-NEXT: vmovq %rax, %xmm1
@@ -3620,7 +3620,7 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) nounwind {
36203620; AVX2-NEXT: vcvtph2ps %xmm0, %xmm2
36213621; AVX2-NEXT: vcvttss2si %xmm2, %rax
36223622; AVX2-NEXT: vmovq %rax, %xmm2
3623- ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3623+ ; AVX2-NEXT: vpsrld $16, % xmm0, % xmm0
36243624; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
36253625; AVX2-NEXT: vcvttss2si %xmm0, %rax
36263626; AVX2-NEXT: vmovq %rax, %xmm0
0 commit comments