@@ -588,20 +588,19 @@ define void @store_i16_stride7_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
588588; AVX2-FCP-NEXT: vmovq {{.*#+}} xmm5 = mem[0],zero
589589; AVX2-FCP-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
590590; AVX2-FCP-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[2],ymm5[2]
591- ; AVX2-FCP-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
592- ; AVX2-FCP-NEXT: vpmovsxbd {{.*#+}} ymm1 = [5,7,1,3,7,0,0,0]
593- ; AVX2-FCP-NEXT: vpermd %ymm4, %ymm1, %ymm1
594- ; AVX2-FCP-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5],zero,zero,zero,zero,zero,zero,ymm1[10,11,14,15,2,3,18,19],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
595- ; AVX2-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [1,3,5,7,1,3,5,7]
596- ; AVX2-FCP-NEXT: # ymm5 = mem[0,1,0,1]
597- ; AVX2-FCP-NEXT: vpermd %ymm0, %ymm5, %ymm5
598- ; AVX2-FCP-NEXT: vpshufb {{.*#+}} ymm5 = zero,zero,zero,zero,ymm5[0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm5[18,19,22,23,26,27],zero,zero,zero,zero,zero,zero,zero,zero
599- ; AVX2-FCP-NEXT: vpor %ymm5, %ymm1, %ymm1
591+ ; AVX2-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [1,5,3,7,1,5,3,7]
592+ ; AVX2-FCP-NEXT: # ymm1 = mem[0,1,0,1]
593+ ; AVX2-FCP-NEXT: vpermd %ymm0, %ymm1, %ymm1
594+ ; AVX2-FCP-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,ymm1[0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm1[18,19,22,23,26,27],zero,zero,zero,zero,zero,zero,zero,zero
595+ ; AVX2-FCP-NEXT: vpmovsxbd {{.*#+}} ymm5 = [5,7,1,3,7,0,0,0]
596+ ; AVX2-FCP-NEXT: vpermd %ymm4, %ymm5, %ymm5
597+ ; AVX2-FCP-NEXT: vpshufb {{.*#+}} ymm5 = ymm5[0,1,4,5],zero,zero,zero,zero,zero,zero,ymm5[10,11,14,15,2,3,18,19],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
598+ ; AVX2-FCP-NEXT: vpor %ymm1, %ymm5, %ymm1
600599; AVX2-FCP-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
601600; AVX2-FCP-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm2[0,1,8,9,4,5,6,7,4,5],zero,zero,ymm2[26,27],zero,zero,zero,zero,ymm2[24,25,20,21,22,23,20,21,28,29]
602601; AVX2-FCP-NEXT: vpshufb {{.*#+}} ymm3 = ymm4[0,1,8,9],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm4[2,3],zero,zero,ymm4[18,19,26,27],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
603602; AVX2-FCP-NEXT: vpor %ymm2, %ymm3, %ymm2
604- ; AVX2-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,2,4, 6,0,2,4 ,6]
603+ ; AVX2-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,4,2, 6,0,4,2 ,6]
605604; AVX2-FCP-NEXT: # ymm3 = mem[0,1,0,1]
606605; AVX2-FCP-NEXT: vpermd %ymm0, %ymm3, %ymm0
607606; AVX2-FCP-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,0,1,4,5,8,9,u,u,u,u,u,u,u,u,18,19,22,23,26,27,u,u,u,u]
@@ -670,17 +669,16 @@ define void @store_i16_stride7_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
670669; AVX512-FCP-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm4
671670; AVX512-FCP-NEXT: vmovq {{.*#+}} xmm5 = mem[0],zero
672671; AVX512-FCP-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
673- ; AVX512-FCP-NEXT: vpmovsxbq {{.*#+}} ymm1 = [0,2,4,0]
674- ; AVX512-FCP-NEXT: vpermi2q %ymm5, %ymm0, %ymm1
675- ; AVX512-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm0 = [0,2,4,6,0,2,4,6]
676- ; AVX512-FCP-NEXT: # ymm0 = mem[0,1,0,1]
677- ; AVX512-FCP-NEXT: vpermd %ymm1, %ymm0, %ymm0
678- ; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[18,19,22,23,26,27],zero,zero,zero,zero
679- ; AVX512-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [1,3,5,7,1,3,5,7]
672+ ; AVX512-FCP-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[2],ymm5[2]
673+ ; AVX512-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,4,2,6,0,4,2,6]
674+ ; AVX512-FCP-NEXT: # ymm1 = mem[0,1,0,1]
675+ ; AVX512-FCP-NEXT: vpermd %ymm0, %ymm1, %ymm1
676+ ; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm1[18,19,22,23,26,27],zero,zero,zero,zero
677+ ; AVX512-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [1,5,3,7,1,5,3,7]
680678; AVX512-FCP-NEXT: # ymm5 = mem[0,1,0,1]
681- ; AVX512-FCP-NEXT: vpermd %ymm1 , %ymm5, %ymm1
682- ; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,ymm1 [0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm1 [18,19,22,23,26,27],zero,zero,zero,zero,zero,zero,zero,zero
683- ; AVX512-FCP-NEXT: vinserti64x4 $1, %ymm1 , %zmm0 , %zmm0
679+ ; AVX512-FCP-NEXT: vpermd %ymm0 , %ymm5, %ymm0
680+ ; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,ymm0 [0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm0 [18,19,22,23,26,27],zero,zero,zero,zero,zero,zero,zero,zero
681+ ; AVX512-FCP-NEXT: vinserti64x4 $1, %ymm0 , %zmm1 , %zmm0
684682; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm1 = ymm4[0,1,8,9],zero,zero,zero,zero,ymm4[u,u,u,u,u,u,2,3],zero,zero,ymm4[18,19,26,27,u,u,u,u,u,u],zero,zero,zero,zero
685683; AVX512-FCP-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
686684; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm2[0,1,8,9,u,u,u,u,u,u],zero,zero,ymm2[26,27],zero,zero,zero,zero,ymm2[u,u,u,u,u,u,20,21,28,29]
@@ -753,17 +751,16 @@ define void @store_i16_stride7_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
753751; AVX512DQ-FCP-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm4
754752; AVX512DQ-FCP-NEXT: vmovq {{.*#+}} xmm5 = mem[0],zero
755753; AVX512DQ-FCP-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
756- ; AVX512DQ-FCP-NEXT: vpmovsxbq {{.*#+}} ymm1 = [0,2,4,0]
757- ; AVX512DQ-FCP-NEXT: vpermi2q %ymm5, %ymm0, %ymm1
758- ; AVX512DQ-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm0 = [0,2,4,6,0,2,4,6]
759- ; AVX512DQ-FCP-NEXT: # ymm0 = mem[0,1,0,1]
760- ; AVX512DQ-FCP-NEXT: vpermd %ymm1, %ymm0, %ymm0
761- ; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[18,19,22,23,26,27],zero,zero,zero,zero
762- ; AVX512DQ-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [1,3,5,7,1,3,5,7]
754+ ; AVX512DQ-FCP-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[2],ymm5[2]
755+ ; AVX512DQ-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,4,2,6,0,4,2,6]
756+ ; AVX512DQ-FCP-NEXT: # ymm1 = mem[0,1,0,1]
757+ ; AVX512DQ-FCP-NEXT: vpermd %ymm0, %ymm1, %ymm1
758+ ; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm1[18,19,22,23,26,27],zero,zero,zero,zero
759+ ; AVX512DQ-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [1,5,3,7,1,5,3,7]
763760; AVX512DQ-FCP-NEXT: # ymm5 = mem[0,1,0,1]
764- ; AVX512DQ-FCP-NEXT: vpermd %ymm1 , %ymm5, %ymm1
765- ; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,ymm1 [0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm1 [18,19,22,23,26,27],zero,zero,zero,zero,zero,zero,zero,zero
766- ; AVX512DQ-FCP-NEXT: vinserti64x4 $1, %ymm1 , %zmm0 , %zmm0
761+ ; AVX512DQ-FCP-NEXT: vpermd %ymm0 , %ymm5, %ymm0
762+ ; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,ymm0 [0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm0 [18,19,22,23,26,27],zero,zero,zero,zero,zero,zero,zero,zero
763+ ; AVX512DQ-FCP-NEXT: vinserti64x4 $1, %ymm0 , %zmm1 , %zmm0
767764; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm1 = ymm4[0,1,8,9],zero,zero,zero,zero,ymm4[u,u,u,u,u,u,2,3],zero,zero,ymm4[18,19,26,27,u,u,u,u,u,u],zero,zero,zero,zero
768765; AVX512DQ-FCP-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
769766; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm2[0,1,8,9,u,u,u,u,u,u],zero,zero,ymm2[26,27],zero,zero,zero,zero,ymm2[u,u,u,u,u,u,20,21,28,29]
0 commit comments