@@ -31,7 +31,7 @@ define <2 x i64> @llrint_v2i64_v2f16(<2 x half> %x) {
3131; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
3232; AVX-NEXT: vcvttss2si %xmm1, %rax
3333; AVX-NEXT: vmovq %rax, %xmm1
34- ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
34+ ; AVX-NEXT: vpsrld $16, % xmm0, % xmm0
3535; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
3636; AVX-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
3737; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -52,7 +52,7 @@ define <2 x i64> @llrint_v2i64_v2f16(<2 x half> %x) {
5252define <4 x i64 > @llrint_v4i64_v4f16 (<4 x half > %x ) {
5353; AVX-LABEL: llrint_v4i64_v4f16:
5454; AVX: # %bb.0:
55- ; AVX-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
55+ ; AVX-NEXT: vpsrlq $48, % xmm0, %xmm1
5656; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
5757; AVX-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1
5858; AVX-NEXT: vcvtps2ph $4, %xmm1, %xmm1
@@ -73,7 +73,7 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
7373; AVX-NEXT: vcvtph2ps %xmm2, %xmm2
7474; AVX-NEXT: vcvttss2si %xmm2, %rax
7575; AVX-NEXT: vmovq %rax, %xmm2
76- ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
76+ ; AVX-NEXT: vpsrld $16, % xmm0, % xmm0
7777; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
7878; AVX-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
7979; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -95,7 +95,7 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
9595define <8 x i64 > @llrint_v8i64_v8f16 (<8 x half > %x ) {
9696; AVX-LABEL: llrint_v8i64_v8f16:
9797; AVX: # %bb.0:
98- ; AVX-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
98+ ; AVX-NEXT: vpsrlq $48, % xmm0, %xmm1
9999; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
100100; AVX-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1
101101; AVX-NEXT: vcvtps2ph $4, %xmm1, %xmm1
@@ -116,7 +116,7 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
116116; AVX-NEXT: vcvtph2ps %xmm2, %xmm2
117117; AVX-NEXT: vcvttss2si %xmm2, %rax
118118; AVX-NEXT: vmovq %rax, %xmm2
119- ; AVX-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[1,1,1,1,4,5,6,7]
119+ ; AVX-NEXT: vpsrld $16, % xmm0, %xmm3
120120; AVX-NEXT: vcvtph2ps %xmm3, %xmm3
121121; AVX-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
122122; AVX-NEXT: vcvtps2ph $4, %xmm3, %xmm3
@@ -171,7 +171,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
171171; AVX-LABEL: llrint_v16i64_v16f16:
172172; AVX: # %bb.0:
173173; AVX-NEXT: vmovdqa %ymm0, %ymm2
174- ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm2[3,3,3,3,4,5,6,7]
174+ ; AVX-NEXT: vpsrlq $48, % xmm2, %xmm0
175175; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
176176; AVX-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
177177; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -192,7 +192,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
192192; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
193193; AVX-NEXT: vcvttss2si %xmm1, %rax
194194; AVX-NEXT: vmovq %rax, %xmm1
195- ; AVX-NEXT: vpshuflw {{.*#+}} xmm3 = xmm2[1,1,1,1,4,5,6,7]
195+ ; AVX-NEXT: vpsrld $16, % xmm2, %xmm3
196196; AVX-NEXT: vcvtph2ps %xmm3, %xmm3
197197; AVX-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
198198; AVX-NEXT: vcvtps2ph $4, %xmm3, %xmm3
@@ -233,7 +233,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
233233; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
234234; AVX-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm1
235235; AVX-NEXT: vextracti128 $1, %ymm2, %xmm3
236- ; AVX-NEXT: vpshuflw {{.*#+}} xmm2 = xmm3[3,3,3,3,4,5,6,7]
236+ ; AVX-NEXT: vpsrlq $48, % xmm3, %xmm2
237237; AVX-NEXT: vcvtph2ps %xmm2, %xmm2
238238; AVX-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
239239; AVX-NEXT: vcvtps2ph $4, %xmm2, %xmm2
@@ -254,7 +254,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
254254; AVX-NEXT: vcvtph2ps %xmm4, %xmm4
255255; AVX-NEXT: vcvttss2si %xmm4, %rax
256256; AVX-NEXT: vmovq %rax, %xmm4
257- ; AVX-NEXT: vpshuflw {{.*#+}} xmm5 = xmm3[1,1,1,1,4,5,6,7]
257+ ; AVX-NEXT: vpsrld $16, % xmm3, %xmm5
258258; AVX-NEXT: vcvtph2ps %xmm5, %xmm5
259259; AVX-NEXT: vroundss $4, %xmm5, %xmm5, %xmm5
260260; AVX-NEXT: vcvtps2ph $4, %xmm5, %xmm5
@@ -348,15 +348,15 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
348348; AVX-NEXT: vcvtph2ps %xmm4, %xmm4
349349; AVX-NEXT: vcvttss2si %xmm4, %rcx
350350; AVX-NEXT: vmovq %rcx, %xmm4
351- ; AVX-NEXT: vpshuflw {{.*#+}} xmm5 = xmm1[1,1,1,1,4,5,6,7]
351+ ; AVX-NEXT: vpsrld $16, % xmm1, %xmm5
352352; AVX-NEXT: vcvtph2ps %xmm5, %xmm5
353353; AVX-NEXT: vroundss $4, %xmm5, %xmm5, %xmm5
354354; AVX-NEXT: vcvtps2ph $4, %xmm5, %xmm5
355355; AVX-NEXT: vcvtph2ps %xmm5, %xmm5
356356; AVX-NEXT: vcvttss2si %xmm5, %rcx
357357; AVX-NEXT: vmovq %rcx, %xmm5
358358; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
359- ; AVX-NEXT: vpshuflw {{.*#+}} xmm5 = xmm1[3,3,3,3,4,5,6,7]
359+ ; AVX-NEXT: vpsrlq $48, % xmm1, %xmm5
360360; AVX-NEXT: vcvtph2ps %xmm5, %xmm5
361361; AVX-NEXT: vroundss $4, %xmm5, %xmm5, %xmm5
362362; AVX-NEXT: vcvtps2ph $4, %xmm5, %xmm5
@@ -408,15 +408,15 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
408408; AVX-NEXT: vcvtph2ps %xmm7, %xmm7
409409; AVX-NEXT: vcvttss2si %xmm7, %rcx
410410; AVX-NEXT: vmovq %rcx, %xmm7
411- ; AVX-NEXT: vpshuflw {{.*#+}} xmm9 = xmm8[1,1,1,1,4,5,6,7]
411+ ; AVX-NEXT: vpsrld $16, % xmm8, %xmm9
412412; AVX-NEXT: vcvtph2ps %xmm9, %xmm9
413413; AVX-NEXT: vroundss $4, %xmm9, %xmm9, %xmm9
414414; AVX-NEXT: vcvtps2ph $4, %xmm9, %xmm9
415415; AVX-NEXT: vcvtph2ps %xmm9, %xmm9
416416; AVX-NEXT: vcvttss2si %xmm9, %rcx
417417; AVX-NEXT: vmovq %rcx, %xmm9
418418; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm9[0]
419- ; AVX-NEXT: vpshuflw {{.*#+}} xmm9 = xmm8[3,3,3,3,4,5,6,7]
419+ ; AVX-NEXT: vpsrlq $48, % xmm8, %xmm9
420420; AVX-NEXT: vcvtph2ps %xmm9, %xmm9
421421; AVX-NEXT: vroundss $4, %xmm9, %xmm9, %xmm9
422422; AVX-NEXT: vcvtps2ph $4, %xmm9, %xmm9
@@ -467,15 +467,15 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
467467; AVX-NEXT: vcvtph2ps %xmm11, %xmm11
468468; AVX-NEXT: vcvttss2si %xmm11, %rcx
469469; AVX-NEXT: vmovq %rcx, %xmm11
470- ; AVX-NEXT: vpshuflw {{.*#+}} xmm12 = xmm0[1,1,1,1,4,5,6,7]
470+ ; AVX-NEXT: vpsrld $16, % xmm0, %xmm12
471471; AVX-NEXT: vcvtph2ps %xmm12, %xmm12
472472; AVX-NEXT: vroundss $4, %xmm12, %xmm12, %xmm12
473473; AVX-NEXT: vcvtps2ph $4, %xmm12, %xmm12
474474; AVX-NEXT: vcvtph2ps %xmm12, %xmm12
475475; AVX-NEXT: vcvttss2si %xmm12, %rcx
476476; AVX-NEXT: vmovq %rcx, %xmm12
477477; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm11 = xmm11[0],xmm12[0]
478- ; AVX-NEXT: vpshuflw {{.*#+}} xmm12 = xmm0[3,3,3,3,4,5,6,7]
478+ ; AVX-NEXT: vpsrlq $48, % xmm0, %xmm12
479479; AVX-NEXT: vcvtph2ps %xmm12, %xmm12
480480; AVX-NEXT: vroundss $4, %xmm12, %xmm12, %xmm12
481481; AVX-NEXT: vcvtps2ph $4, %xmm12, %xmm12
@@ -526,7 +526,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
526526; AVX-NEXT: vcvtps2ph $4, %xmm15, %xmm15
527527; AVX-NEXT: vcvtph2ps %xmm15, %xmm15
528528; AVX-NEXT: vcvttss2si %xmm15, %rcx
529- ; AVX-NEXT: vpshuflw {{.*#+}} xmm15 = xmm0[1,1,1,1,4,5,6,7]
529+ ; AVX-NEXT: vpsrld $16, % xmm0, %xmm15
530530; AVX-NEXT: vcvtph2ps %xmm15, %xmm15
531531; AVX-NEXT: vroundss $4, %xmm15, %xmm15, %xmm15
532532; AVX-NEXT: vcvtps2ph $4, %xmm15, %xmm15
@@ -535,7 +535,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
535535; AVX-NEXT: vmovq %rcx, %xmm15
536536; AVX-NEXT: vmovq %rdx, %xmm2
537537; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm15[0],xmm2[0]
538- ; AVX-NEXT: vpshuflw {{.*#+}} xmm15 = xmm0[3,3,3,3,4,5,6,7]
538+ ; AVX-NEXT: vpsrlq $48, % xmm0, %xmm15
539539; AVX-NEXT: vcvtph2ps %xmm15, %xmm15
540540; AVX-NEXT: vroundss $4, %xmm15, %xmm15, %xmm15
541541; AVX-NEXT: vcvtps2ph $4, %xmm15, %xmm15
0 commit comments