@@ -4808,16 +4808,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
48084808; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
48094809; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
48104810; X64-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
4811- ; X64-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
4812- ; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
4813- ; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
4814- ; X64-KNL-NEXT: vpsllq $4, %zmm0, %zmm0
4815- ; X64-KNL-NEXT: vpsllq $4, %zmm2, %zmm2
4816- ; X64-KNL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
4817- ; X64-KNL-NEXT: kshiftrw $8, %k1, %k2
4818- ; X64-KNL-NEXT: vgatherqps (%rdi,%zmm2), %ymm3 {%k2}
4819- ; X64-KNL-NEXT: vgatherqps (%rdi,%zmm0), %ymm1 {%k1}
4820- ; X64-KNL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
4811+ ; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4812+ ; X64-KNL-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
4813+ ; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
48214814; X64-KNL-NEXT: retq
48224815;
48234816; X86-KNL-LABEL: test_gather_structpt_16f32_mask_index:
@@ -4839,16 +4832,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
48394832; X64-SKX-SMALL-NEXT: vpmovd2m %zmm0, %k1
48404833; X64-SKX-SMALL-NEXT: vmovdqu64 (%rsi), %zmm0
48414834; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
4842- ; X64-SKX-SMALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
4843- ; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
4844- ; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
4845- ; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm0, %zmm0
4846- ; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm2, %zmm2
4847- ; X64-SKX-SMALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
4848- ; X64-SKX-SMALL-NEXT: kshiftrw $8, %k1, %k2
4849- ; X64-SKX-SMALL-NEXT: vgatherqps (%rdi,%zmm2), %ymm3 {%k2}
4850- ; X64-SKX-SMALL-NEXT: vgatherqps (%rdi,%zmm0), %ymm1 {%k1}
4851- ; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
4835+ ; X64-SKX-SMALL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4836+ ; X64-SKX-SMALL-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
4837+ ; X64-SKX-SMALL-NEXT: vmovaps %zmm1, %zmm0
48524838; X64-SKX-SMALL-NEXT: retq
48534839;
48544840; X64-SKX-LARGE-LABEL: test_gather_structpt_16f32_mask_index:
@@ -4859,16 +4845,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
48594845; X64-SKX-LARGE-NEXT: vmovdqu64 (%rsi), %zmm0
48604846; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
48614847; X64-SKX-LARGE-NEXT: vpandd (%rax){1to16}, %zmm0, %zmm0
4862- ; X64-SKX-LARGE-NEXT: vextracti64x4 $1, %zmm0, %ymm2
4863- ; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
4864- ; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
4865- ; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm0, %zmm0
4866- ; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm2, %zmm2
4867- ; X64-SKX-LARGE-NEXT: vextractf64x4 $1, %zmm1, %ymm3
4868- ; X64-SKX-LARGE-NEXT: kshiftrw $8, %k1, %k2
4869- ; X64-SKX-LARGE-NEXT: vgatherqps (%rdi,%zmm2), %ymm3 {%k2}
4870- ; X64-SKX-LARGE-NEXT: vgatherqps (%rdi,%zmm0), %ymm1 {%k1}
4871- ; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
4848+ ; X64-SKX-LARGE-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4849+ ; X64-SKX-LARGE-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
4850+ ; X64-SKX-LARGE-NEXT: vmovaps %zmm1, %zmm0
48724851; X64-SKX-LARGE-NEXT: retq
48734852;
48744853; X86-SKX-LABEL: test_gather_structpt_16f32_mask_index:
@@ -4898,16 +4877,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
48984877; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
48994878; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
49004879; X64-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
4901- ; X64-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
4902- ; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
4903- ; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
4904- ; X64-KNL-NEXT: vpsllq $4, %zmm0, %zmm0
4905- ; X64-KNL-NEXT: vpsllq $4, %zmm2, %zmm2
4906- ; X64-KNL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
4907- ; X64-KNL-NEXT: kshiftrw $8, %k1, %k2
4908- ; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm3 {%k2}
4909- ; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm0), %ymm1 {%k1}
4910- ; X64-KNL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
4880+ ; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4881+ ; X64-KNL-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
4882+ ; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
49114883; X64-KNL-NEXT: retq
49124884;
49134885; X86-KNL-LABEL: test_gather_structpt_16f32_mask_index_offset:
@@ -4929,16 +4901,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
49294901; X64-SKX-SMALL-NEXT: vpmovd2m %zmm0, %k1
49304902; X64-SKX-SMALL-NEXT: vmovdqu64 (%rsi), %zmm0
49314903; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
4932- ; X64-SKX-SMALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
4933- ; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
4934- ; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
4935- ; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm0, %zmm0
4936- ; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm2, %zmm2
4937- ; X64-SKX-SMALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
4938- ; X64-SKX-SMALL-NEXT: kshiftrw $8, %k1, %k2
4939- ; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm3 {%k2}
4940- ; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm0), %ymm1 {%k1}
4941- ; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
4904+ ; X64-SKX-SMALL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4905+ ; X64-SKX-SMALL-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
4906+ ; X64-SKX-SMALL-NEXT: vmovaps %zmm1, %zmm0
49424907; X64-SKX-SMALL-NEXT: retq
49434908;
49444909; X64-SKX-LARGE-LABEL: test_gather_structpt_16f32_mask_index_offset:
@@ -4949,16 +4914,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
49494914; X64-SKX-LARGE-NEXT: vmovdqu64 (%rsi), %zmm0
49504915; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
49514916; X64-SKX-LARGE-NEXT: vpandd (%rax){1to16}, %zmm0, %zmm0
4952- ; X64-SKX-LARGE-NEXT: vextracti64x4 $1, %zmm0, %ymm2
4953- ; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
4954- ; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
4955- ; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm0, %zmm0
4956- ; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm2, %zmm2
4957- ; X64-SKX-LARGE-NEXT: vextractf64x4 $1, %zmm1, %ymm3
4958- ; X64-SKX-LARGE-NEXT: kshiftrw $8, %k1, %k2
4959- ; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm3 {%k2}
4960- ; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm0), %ymm1 {%k1}
4961- ; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
4917+ ; X64-SKX-LARGE-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4918+ ; X64-SKX-LARGE-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
4919+ ; X64-SKX-LARGE-NEXT: vmovaps %zmm1, %zmm0
49624920; X64-SKX-LARGE-NEXT: retq
49634921;
49644922; X86-SKX-LABEL: test_gather_structpt_16f32_mask_index_offset:
@@ -4988,23 +4946,11 @@ define {<16 x float>, <16 x float>} @test_gather_structpt_16f32_mask_index_pair(
49884946; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
49894947; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
49904948; X64-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
4991- ; X64-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
4992- ; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
4993- ; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
4994- ; X64-KNL-NEXT: vpsllq $4, %zmm0, %zmm3
4995- ; X64-KNL-NEXT: vpsllq $4, %zmm2, %zmm2
4996- ; X64-KNL-NEXT: vextractf64x4 $1, %zmm1, %ymm4
4997- ; X64-KNL-NEXT: kshiftrw $8, %k1, %k2
4998- ; X64-KNL-NEXT: kmovw %k2, %k3
4999- ; X64-KNL-NEXT: vmovaps %ymm4, %ymm0
5000- ; X64-KNL-NEXT: vgatherqps (%rdi,%zmm2), %ymm0 {%k3}
5001- ; X64-KNL-NEXT: vmovaps %ymm1, %ymm5
5002- ; X64-KNL-NEXT: kmovw %k1, %k3
5003- ; X64-KNL-NEXT: vgatherqps (%rdi,%zmm3), %ymm5 {%k3}
5004- ; X64-KNL-NEXT: vinsertf64x4 $1, %ymm0, %zmm5, %zmm0
5005- ; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm4 {%k2}
5006- ; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm3), %ymm1 {%k1}
5007- ; X64-KNL-NEXT: vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
4949+ ; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm2
4950+ ; X64-KNL-NEXT: kmovw %k1, %k2
4951+ ; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
4952+ ; X64-KNL-NEXT: vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
4953+ ; X64-KNL-NEXT: vgatherdps 4(%rdi,%zmm2,8), %zmm1 {%k1}
50084954; X64-KNL-NEXT: retq
50094955;
50104956; X86-KNL-LABEL: test_gather_structpt_16f32_mask_index_pair:
@@ -5028,23 +4974,11 @@ define {<16 x float>, <16 x float>} @test_gather_structpt_16f32_mask_index_pair(
50284974; X64-SKX-SMALL-NEXT: vpmovd2m %zmm0, %k1
50294975; X64-SKX-SMALL-NEXT: vmovdqu64 (%rsi), %zmm0
50304976; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
5031- ; X64-SKX-SMALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
5032- ; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
5033- ; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
5034- ; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm0, %zmm3
5035- ; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm2, %zmm2
5036- ; X64-SKX-SMALL-NEXT: vextractf64x4 $1, %zmm1, %ymm4
5037- ; X64-SKX-SMALL-NEXT: kshiftrw $8, %k1, %k2
5038- ; X64-SKX-SMALL-NEXT: kmovw %k2, %k3
5039- ; X64-SKX-SMALL-NEXT: vmovaps %ymm4, %ymm0
5040- ; X64-SKX-SMALL-NEXT: vgatherqps (%rdi,%zmm2), %ymm0 {%k3}
5041- ; X64-SKX-SMALL-NEXT: vmovaps %ymm1, %ymm5
5042- ; X64-SKX-SMALL-NEXT: kmovw %k1, %k3
5043- ; X64-SKX-SMALL-NEXT: vgatherqps (%rdi,%zmm3), %ymm5 {%k3}
5044- ; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm5, %zmm0
5045- ; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm4 {%k2}
5046- ; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm3), %ymm1 {%k1}
5047- ; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
4977+ ; X64-SKX-SMALL-NEXT: vpaddd %zmm0, %zmm0, %zmm2
4978+ ; X64-SKX-SMALL-NEXT: kmovw %k1, %k2
4979+ ; X64-SKX-SMALL-NEXT: vmovaps %zmm1, %zmm0
4980+ ; X64-SKX-SMALL-NEXT: vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
4981+ ; X64-SKX-SMALL-NEXT: vgatherdps 4(%rdi,%zmm2,8), %zmm1 {%k1}
50484982; X64-SKX-SMALL-NEXT: retq
50494983;
50504984; X64-SKX-LARGE-LABEL: test_gather_structpt_16f32_mask_index_pair:
@@ -5055,23 +4989,11 @@ define {<16 x float>, <16 x float>} @test_gather_structpt_16f32_mask_index_pair(
50554989; X64-SKX-LARGE-NEXT: vmovdqu64 (%rsi), %zmm0
50564990; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
50574991; X64-SKX-LARGE-NEXT: vpandd (%rax){1to16}, %zmm0, %zmm0
5058- ; X64-SKX-LARGE-NEXT: vextracti64x4 $1, %zmm0, %ymm2
5059- ; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
5060- ; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
5061- ; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm0, %zmm3
5062- ; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm2, %zmm2
5063- ; X64-SKX-LARGE-NEXT: vextractf64x4 $1, %zmm1, %ymm4
5064- ; X64-SKX-LARGE-NEXT: kshiftrw $8, %k1, %k2
5065- ; X64-SKX-LARGE-NEXT: vmovaps %ymm4, %ymm0
5066- ; X64-SKX-LARGE-NEXT: kmovw %k2, %k3
5067- ; X64-SKX-LARGE-NEXT: vgatherqps (%rdi,%zmm2), %ymm0 {%k3}
5068- ; X64-SKX-LARGE-NEXT: vmovaps %ymm1, %ymm5
5069- ; X64-SKX-LARGE-NEXT: kmovw %k1, %k3
5070- ; X64-SKX-LARGE-NEXT: vgatherqps (%rdi,%zmm3), %ymm5 {%k3}
5071- ; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm0, %zmm5, %zmm0
5072- ; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm4 {%k2}
5073- ; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm3), %ymm1 {%k1}
5074- ; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
4992+ ; X64-SKX-LARGE-NEXT: vpaddd %zmm0, %zmm0, %zmm2
4993+ ; X64-SKX-LARGE-NEXT: kmovw %k1, %k2
4994+ ; X64-SKX-LARGE-NEXT: vmovaps %zmm1, %zmm0
4995+ ; X64-SKX-LARGE-NEXT: vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
4996+ ; X64-SKX-LARGE-NEXT: vgatherdps 4(%rdi,%zmm2,8), %zmm1 {%k1}
50754997; X64-SKX-LARGE-NEXT: retq
50764998;
50774999; X86-SKX-LABEL: test_gather_structpt_16f32_mask_index_pair:
0 commit comments