@@ -4805,9 +4805,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
48054805; X64-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
48064806; X64-KNL-NEXT: vpslld $31, %zmm0, %zmm0
48074807; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
4808- ; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
4809- ; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4810- ; X64-KNL-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
4808+ ; X64-KNL-NEXT: vpslld $4, (%rsi), %zmm0
4809+ ; X64-KNL-NEXT: vgatherdps (%rdi,%zmm0), %zmm1 {%k1}
48114810; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
48124811; X64-KNL-NEXT: retq
48134812;
@@ -4818,9 +4817,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
48184817; X86-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
48194818; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
48204819; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %ecx
4821- ; X86-KNL-NEXT: vmovdqu64 (%ecx), %zmm0
4822- ; X86-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4823- ; X86-KNL-NEXT: vgatherdps (%eax,%zmm0,8), %zmm1 {%k1}
4820+ ; X86-KNL-NEXT: vpslld $4, (%ecx), %zmm0
4821+ ; X86-KNL-NEXT: vgatherdps (%eax,%zmm0), %zmm1 {%k1}
48244822; X86-KNL-NEXT: vmovaps %zmm1, %zmm0
48254823; X86-KNL-NEXT: retl
48264824;
@@ -4829,9 +4827,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
48294827; X64-SKX-NEXT: vpmovsxbd %xmm0, %zmm0
48304828; X64-SKX-NEXT: vpslld $31, %zmm0, %zmm0
48314829; X64-SKX-NEXT: vpmovd2m %zmm0, %k1
4832- ; X64-SKX-NEXT: vmovdqu64 (%rsi), %zmm0
4833- ; X64-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4834- ; X64-SKX-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
4830+ ; X64-SKX-NEXT: vpslld $4, (%rsi), %zmm0
4831+ ; X64-SKX-NEXT: vgatherdps (%rdi,%zmm0), %zmm1 {%k1}
48354832; X64-SKX-NEXT: vmovaps %zmm1, %zmm0
48364833; X64-SKX-NEXT: retq
48374834;
@@ -4842,9 +4839,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
48424839; X86-SKX-NEXT: vpmovd2m %zmm0, %k1
48434840; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
48444841; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx
4845- ; X86-SKX-NEXT: vmovdqu64 (%ecx), %zmm0
4846- ; X86-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4847- ; X86-SKX-NEXT: vgatherdps (%eax,%zmm0,8), %zmm1 {%k1}
4842+ ; X86-SKX-NEXT: vpslld $4, (%ecx), %zmm0
4843+ ; X86-SKX-NEXT: vgatherdps (%eax,%zmm0), %zmm1 {%k1}
48484844; X86-SKX-NEXT: vmovaps %zmm1, %zmm0
48494845; X86-SKX-NEXT: retl
48504846 %wide.load = load <16 x i32 >, ptr %arr , align 4
@@ -4861,9 +4857,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
48614857; X64-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
48624858; X64-KNL-NEXT: vpslld $31, %zmm0, %zmm0
48634859; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
4864- ; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
4865- ; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4866- ; X64-KNL-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
4860+ ; X64-KNL-NEXT: vpslld $4, (%rsi), %zmm0
4861+ ; X64-KNL-NEXT: vgatherdps 4(%rdi,%zmm0), %zmm1 {%k1}
48674862; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
48684863; X64-KNL-NEXT: retq
48694864;
@@ -4874,9 +4869,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
48744869; X86-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
48754870; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
48764871; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %ecx
4877- ; X86-KNL-NEXT: vmovdqu64 (%ecx), %zmm0
4878- ; X86-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4879- ; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm0,8), %zmm1 {%k1}
4872+ ; X86-KNL-NEXT: vpslld $4, (%ecx), %zmm0
4873+ ; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm0), %zmm1 {%k1}
48804874; X86-KNL-NEXT: vmovaps %zmm1, %zmm0
48814875; X86-KNL-NEXT: retl
48824876;
@@ -4885,9 +4879,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
48854879; X64-SKX-NEXT: vpmovsxbd %xmm0, %zmm0
48864880; X64-SKX-NEXT: vpslld $31, %zmm0, %zmm0
48874881; X64-SKX-NEXT: vpmovd2m %zmm0, %k1
4888- ; X64-SKX-NEXT: vmovdqu64 (%rsi), %zmm0
4889- ; X64-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4890- ; X64-SKX-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
4882+ ; X64-SKX-NEXT: vpslld $4, (%rsi), %zmm0
4883+ ; X64-SKX-NEXT: vgatherdps 4(%rdi,%zmm0), %zmm1 {%k1}
48914884; X64-SKX-NEXT: vmovaps %zmm1, %zmm0
48924885; X64-SKX-NEXT: retq
48934886;
@@ -4898,9 +4891,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
48984891; X86-SKX-NEXT: vpmovd2m %zmm0, %k1
48994892; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
49004893; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx
4901- ; X86-SKX-NEXT: vmovdqu64 (%ecx), %zmm0
4902- ; X86-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4903- ; X86-SKX-NEXT: vgatherdps 4(%eax,%zmm0,8), %zmm1 {%k1}
4894+ ; X86-SKX-NEXT: vpslld $4, (%ecx), %zmm0
4895+ ; X86-SKX-NEXT: vgatherdps 4(%eax,%zmm0), %zmm1 {%k1}
49044896; X86-SKX-NEXT: vmovaps %zmm1, %zmm0
49054897; X86-SKX-NEXT: retl
49064898 %wide.load = load <16 x i32 >, ptr %arr , align 4
@@ -4917,12 +4909,11 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
49174909; X64-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
49184910; X64-KNL-NEXT: vpslld $31, %zmm0, %zmm0
49194911; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
4920- ; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
4921- ; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm2
4912+ ; X64-KNL-NEXT: vpslld $4, (%rsi), %zmm2
49224913; X64-KNL-NEXT: kmovw %k1, %k2
49234914; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
4924- ; X64-KNL-NEXT: vgatherdps (%rdi,%zmm2,8 ), %zmm0 {%k2}
4925- ; X64-KNL-NEXT: vgatherdps 4(%rdi,%zmm2,8 ), %zmm1 {%k1}
4915+ ; X64-KNL-NEXT: vgatherdps (%rdi,%zmm2), %zmm0 {%k2}
4916+ ; X64-KNL-NEXT: vgatherdps 4(%rdi,%zmm2), %zmm1 {%k1}
49264917; X64-KNL-NEXT: retq
49274918;
49284919; X86-KNL-LABEL: test_gather_16f32_mask_index_pair:
@@ -4932,25 +4923,23 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
49324923; X86-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
49334924; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
49344925; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %ecx
4935- ; X86-KNL-NEXT: vmovdqu64 (%ecx), %zmm0
4936- ; X86-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm2
4926+ ; X86-KNL-NEXT: vpslld $4, (%ecx), %zmm2
49374927; X86-KNL-NEXT: kmovw %k1, %k2
49384928; X86-KNL-NEXT: vmovaps %zmm1, %zmm0
4939- ; X86-KNL-NEXT: vgatherdps (%eax,%zmm2,8 ), %zmm0 {%k2}
4940- ; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm2,8 ), %zmm1 {%k1}
4929+ ; X86-KNL-NEXT: vgatherdps (%eax,%zmm2), %zmm0 {%k2}
4930+ ; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm2), %zmm1 {%k1}
49414931; X86-KNL-NEXT: retl
49424932;
49434933; X64-SKX-LABEL: test_gather_16f32_mask_index_pair:
49444934; X64-SKX: # %bb.0:
49454935; X64-SKX-NEXT: vpmovsxbd %xmm0, %zmm0
49464936; X64-SKX-NEXT: vpslld $31, %zmm0, %zmm0
49474937; X64-SKX-NEXT: vpmovd2m %zmm0, %k1
4948- ; X64-SKX-NEXT: vmovdqu64 (%rsi), %zmm0
4949- ; X64-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm2
4938+ ; X64-SKX-NEXT: vpslld $4, (%rsi), %zmm2
49504939; X64-SKX-NEXT: kmovw %k1, %k2
49514940; X64-SKX-NEXT: vmovaps %zmm1, %zmm0
4952- ; X64-SKX-NEXT: vgatherdps (%rdi,%zmm2,8 ), %zmm0 {%k2}
4953- ; X64-SKX-NEXT: vgatherdps 4(%rdi,%zmm2,8 ), %zmm1 {%k1}
4941+ ; X64-SKX-NEXT: vgatherdps (%rdi,%zmm2), %zmm0 {%k2}
4942+ ; X64-SKX-NEXT: vgatherdps 4(%rdi,%zmm2), %zmm1 {%k1}
49544943; X64-SKX-NEXT: retq
49554944;
49564945; X86-SKX-LABEL: test_gather_16f32_mask_index_pair:
@@ -4960,12 +4949,11 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
49604949; X86-SKX-NEXT: vpmovd2m %zmm0, %k1
49614950; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
49624951; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx
4963- ; X86-SKX-NEXT: vmovdqu64 (%ecx), %zmm0
4964- ; X86-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm2
4952+ ; X86-SKX-NEXT: vpslld $4, (%ecx), %zmm2
49654953; X86-SKX-NEXT: kmovw %k1, %k2
49664954; X86-SKX-NEXT: vmovaps %zmm1, %zmm0
4967- ; X86-SKX-NEXT: vgatherdps (%eax,%zmm2,8 ), %zmm0 {%k2}
4968- ; X86-SKX-NEXT: vgatherdps 4(%eax,%zmm2,8 ), %zmm1 {%k1}
4955+ ; X86-SKX-NEXT: vgatherdps (%eax,%zmm2), %zmm0 {%k2}
4956+ ; X86-SKX-NEXT: vgatherdps 4(%eax,%zmm2), %zmm1 {%k1}
49694957; X86-SKX-NEXT: retl
49704958 %wide.load = load <16 x i32 >, ptr %arr , align 4
49714959 %and = and <16 x i32 > %wide.load , <i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 >
0 commit comments