Skip to content

Commit bfbbe9f

Browse files
Rohit AggarwalRohit Aggarwal
authored andcommitted
Update the masked_gather_scatter.ll
1 parent 8ce9360 commit bfbbe9f

File tree

1 file changed

+76
-126
lines changed

1 file changed

+76
-126
lines changed

llvm/test/CodeGen/X86/masked_gather_scatter.ll

Lines changed: 76 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -639,7 +639,23 @@ define <16 x float> @test14(ptr %base, i32 %ind, <16 x ptr> %vec) {
639639
; SKX_32-NEXT: vpxor %xmm0, %xmm0, %xmm0
640640
; SKX_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
641641
; SKX_32-NEXT: retl
642-
642+
; X64-LABEL: test14:
643+
; X64: # %bb.0:
644+
; X64-NEXT: vmovq %xmm0, %rax
645+
; X64-NEXT: vpbroadcastd %esi, %zmm1
646+
; X64-NEXT: kxnorw %k0, %k0, %k1
647+
; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0
648+
; X64-NEXT: vgatherdps (%rax,%zmm1,4), %zmm0 {%k1}
649+
; X64-NEXT: retq
650+
;
651+
; X86-LABEL: test14:
652+
; X86: # %bb.0:
653+
; X86-NEXT: vmovd %xmm0, %eax
654+
; X86-NEXT: vbroadcastss {{[0-9]+}}(%esp), %zmm1
655+
; X86-NEXT: kxnorw %k0, %k0, %k1
656+
; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0
657+
; X86-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
658+
; X86-NEXT: retl
643659
%broadcast.splatinsert = insertelement <16 x ptr> %vec, ptr %base, i32 1
644660
%broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
645661

@@ -4826,16 +4842,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
48264842
; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
48274843
; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
48284844
; X64-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
4829-
; X64-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
4830-
; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
4831-
; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
4832-
; X64-KNL-NEXT: vpsllq $4, %zmm0, %zmm0
4833-
; X64-KNL-NEXT: vpsllq $4, %zmm2, %zmm2
4834-
; X64-KNL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
4835-
; X64-KNL-NEXT: kshiftrw $8, %k1, %k2
4836-
; X64-KNL-NEXT: vgatherqps (%rdi,%zmm2), %ymm3 {%k2}
4837-
; X64-KNL-NEXT: vgatherqps (%rdi,%zmm0), %ymm1 {%k1}
4838-
; X64-KNL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
4845+
; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4846+
; X64-KNL-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
4847+
; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
48394848
; X64-KNL-NEXT: retq
48404849
;
48414850
; X86-KNL-LABEL: test_gather_structpt_16f32_mask_index:
@@ -4845,8 +4854,10 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
48454854
; X86-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
48464855
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
48474856
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %ecx
4848-
; X86-KNL-NEXT: vpslld $4, (%ecx), %zmm0
4849-
; X86-KNL-NEXT: vgatherdps (%eax,%zmm0), %zmm1 {%k1}
4857+
; X86-KNL-NEXT: vmovdqu64 (%ecx), %zmm0
4858+
; X86-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
4859+
; X86-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4860+
; X86-KNL-NEXT: vgatherdps (%eax,%zmm0,8), %zmm1 {%k1}
48504861
; X86-KNL-NEXT: vmovaps %zmm1, %zmm0
48514862
; X86-KNL-NEXT: retl
48524863
;
@@ -4857,16 +4868,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
48574868
; X64-SKX-SMALL-NEXT: vpmovd2m %zmm0, %k1
48584869
; X64-SKX-SMALL-NEXT: vmovdqu64 (%rsi), %zmm0
48594870
; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
4860-
; X64-SKX-SMALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
4861-
; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
4862-
; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
4863-
; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm0, %zmm0
4864-
; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm2, %zmm2
4865-
; X64-SKX-SMALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
4866-
; X64-SKX-SMALL-NEXT: kshiftrw $8, %k1, %k2
4867-
; X64-SKX-SMALL-NEXT: vgatherqps (%rdi,%zmm2), %ymm3 {%k2}
4868-
; X64-SKX-SMALL-NEXT: vgatherqps (%rdi,%zmm0), %ymm1 {%k1}
4869-
; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
4871+
; X64-SKX-SMALL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4872+
; X64-SKX-SMALL-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
4873+
; X64-SKX-SMALL-NEXT: vmovaps %zmm1, %zmm0
48704874
; X64-SKX-SMALL-NEXT: retq
48714875
;
48724876
; X64-SKX-LARGE-LABEL: test_gather_structpt_16f32_mask_index:
@@ -4877,16 +4881,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
48774881
; X64-SKX-LARGE-NEXT: vmovdqu64 (%rsi), %zmm0
48784882
; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
48794883
; X64-SKX-LARGE-NEXT: vpandd (%rax){1to16}, %zmm0, %zmm0
4880-
; X64-SKX-LARGE-NEXT: vextracti64x4 $1, %zmm0, %ymm2
4881-
; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
4882-
; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
4883-
; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm0, %zmm0
4884-
; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm2, %zmm2
4885-
; X64-SKX-LARGE-NEXT: vextractf64x4 $1, %zmm1, %ymm3
4886-
; X64-SKX-LARGE-NEXT: kshiftrw $8, %k1, %k2
4887-
; X64-SKX-LARGE-NEXT: vgatherqps (%rdi,%zmm2), %ymm3 {%k2}
4888-
; X64-SKX-LARGE-NEXT: vgatherqps (%rdi,%zmm0), %ymm1 {%k1}
4889-
; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
4884+
; X64-SKX-LARGE-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4885+
; X64-SKX-LARGE-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
4886+
; X64-SKX-LARGE-NEXT: vmovaps %zmm1, %zmm0
48904887
; X64-SKX-LARGE-NEXT: retq
48914888
;
48924889
; X86-SKX-LABEL: test_gather_structpt_16f32_mask_index:
@@ -4896,8 +4893,10 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
48964893
; X86-SKX-NEXT: vpmovd2m %zmm0, %k1
48974894
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
48984895
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx
4899-
; X86-SKX-NEXT: vpslld $4, (%ecx), %zmm0
4900-
; X86-SKX-NEXT: vgatherdps (%eax,%zmm0), %zmm1 {%k1}
4896+
; X86-SKX-NEXT: vmovdqu64 (%ecx), %zmm0
4897+
; X86-SKX-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
4898+
; X86-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4899+
; X86-SKX-NEXT: vgatherdps (%eax,%zmm0,8), %zmm1 {%k1}
49014900
; X86-SKX-NEXT: vmovaps %zmm1, %zmm0
49024901
; X86-SKX-NEXT: retl
49034902
%wide.load = load <16 x i32>, ptr %arr, align 4
@@ -4916,16 +4915,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
49164915
; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
49174916
; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
49184917
; X64-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
4919-
; X64-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
4920-
; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
4921-
; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
4922-
; X64-KNL-NEXT: vpsllq $4, %zmm0, %zmm0
4923-
; X64-KNL-NEXT: vpsllq $4, %zmm2, %zmm2
4924-
; X64-KNL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
4925-
; X64-KNL-NEXT: kshiftrw $8, %k1, %k2
4926-
; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm3 {%k2}
4927-
; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm0), %ymm1 {%k1}
4928-
; X64-KNL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
4918+
; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4919+
; X64-KNL-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
4920+
; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
49294921
; X64-KNL-NEXT: retq
49304922
;
49314923
; X86-KNL-LABEL: test_gather_structpt_16f32_mask_index_offset:
@@ -4935,8 +4927,10 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
49354927
; X86-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
49364928
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
49374929
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %ecx
4938-
; X86-KNL-NEXT: vpslld $4, (%ecx), %zmm0
4939-
; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm0), %zmm1 {%k1}
4930+
; X86-KNL-NEXT: vmovdqu64 (%ecx), %zmm0
4931+
; X86-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
4932+
; X86-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4933+
; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm0,8), %zmm1 {%k1}
49404934
; X86-KNL-NEXT: vmovaps %zmm1, %zmm0
49414935
; X86-KNL-NEXT: retl
49424936
;
@@ -4947,16 +4941,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
49474941
; X64-SKX-SMALL-NEXT: vpmovd2m %zmm0, %k1
49484942
; X64-SKX-SMALL-NEXT: vmovdqu64 (%rsi), %zmm0
49494943
; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
4950-
; X64-SKX-SMALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
4951-
; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
4952-
; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
4953-
; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm0, %zmm0
4954-
; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm2, %zmm2
4955-
; X64-SKX-SMALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
4956-
; X64-SKX-SMALL-NEXT: kshiftrw $8, %k1, %k2
4957-
; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm3 {%k2}
4958-
; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm0), %ymm1 {%k1}
4959-
; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
4944+
; X64-SKX-SMALL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4945+
; X64-SKX-SMALL-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
4946+
; X64-SKX-SMALL-NEXT: vmovaps %zmm1, %zmm0
49604947
; X64-SKX-SMALL-NEXT: retq
49614948
;
49624949
; X64-SKX-LARGE-LABEL: test_gather_structpt_16f32_mask_index_offset:
@@ -4967,16 +4954,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
49674954
; X64-SKX-LARGE-NEXT: vmovdqu64 (%rsi), %zmm0
49684955
; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
49694956
; X64-SKX-LARGE-NEXT: vpandd (%rax){1to16}, %zmm0, %zmm0
4970-
; X64-SKX-LARGE-NEXT: vextracti64x4 $1, %zmm0, %ymm2
4971-
; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
4972-
; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
4973-
; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm0, %zmm0
4974-
; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm2, %zmm2
4975-
; X64-SKX-LARGE-NEXT: vextractf64x4 $1, %zmm1, %ymm3
4976-
; X64-SKX-LARGE-NEXT: kshiftrw $8, %k1, %k2
4977-
; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm3 {%k2}
4978-
; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm0), %ymm1 {%k1}
4979-
; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
4957+
; X64-SKX-LARGE-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4958+
; X64-SKX-LARGE-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
4959+
; X64-SKX-LARGE-NEXT: vmovaps %zmm1, %zmm0
49804960
; X64-SKX-LARGE-NEXT: retq
49814961
;
49824962
; X86-SKX-LABEL: test_gather_structpt_16f32_mask_index_offset:
@@ -4986,8 +4966,10 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
49864966
; X86-SKX-NEXT: vpmovd2m %zmm0, %k1
49874967
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
49884968
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx
4989-
; X86-SKX-NEXT: vpslld $4, (%ecx), %zmm0
4990-
; X86-SKX-NEXT: vgatherdps 4(%eax,%zmm0), %zmm1 {%k1}
4969+
; X86-SKX-NEXT: vmovdqu64 (%ecx), %zmm0
4970+
; X86-SKX-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
4971+
; X86-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4972+
; X86-SKX-NEXT: vgatherdps 4(%eax,%zmm0,8), %zmm1 {%k1}
49914973
; X86-SKX-NEXT: vmovaps %zmm1, %zmm0
49924974
; X86-SKX-NEXT: retl
49934975
%wide.load = load <16 x i32>, ptr %arr, align 4
@@ -5006,23 +4988,11 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
50064988
; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
50074989
; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
50084990
; X64-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
5009-
; X64-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
5010-
; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
5011-
; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
5012-
; X64-KNL-NEXT: vpsllq $4, %zmm0, %zmm3
5013-
; X64-KNL-NEXT: vpsllq $4, %zmm2, %zmm2
5014-
; X64-KNL-NEXT: vextractf64x4 $1, %zmm1, %ymm4
5015-
; X64-KNL-NEXT: kshiftrw $8, %k1, %k2
5016-
; X64-KNL-NEXT: kmovw %k2, %k3
5017-
; X64-KNL-NEXT: vmovaps %ymm4, %ymm0
5018-
; X64-KNL-NEXT: vgatherqps (%rdi,%zmm2), %ymm0 {%k3}
5019-
; X64-KNL-NEXT: vmovaps %ymm1, %ymm5
5020-
; X64-KNL-NEXT: kmovw %k1, %k3
5021-
; X64-KNL-NEXT: vgatherqps (%rdi,%zmm3), %ymm5 {%k3}
5022-
; X64-KNL-NEXT: vinsertf64x4 $1, %ymm0, %zmm5, %zmm0
5023-
; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm4 {%k2}
5024-
; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm3), %ymm1 {%k1}
5025-
; X64-KNL-NEXT: vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
4991+
; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm2
4992+
; X64-KNL-NEXT: kmovw %k1, %k2
4993+
; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
4994+
; X64-KNL-NEXT: vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
4995+
; X64-KNL-NEXT: vgatherdps 4(%rdi,%zmm2,8), %zmm1 {%k1}
50264996
; X64-KNL-NEXT: retq
50274997
;
50284998
; X86-KNL-LABEL: test_gather_16f32_mask_index_pair:
@@ -5032,11 +5002,13 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
50325002
; X86-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
50335003
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
50345004
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %ecx
5035-
; X86-KNL-NEXT: vpslld $4, (%ecx), %zmm2
5005+
; X86-KNL-NEXT: vmovdqu64 (%ecx), %zmm0
5006+
; X86-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
5007+
; X86-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm2
50365008
; X86-KNL-NEXT: kmovw %k1, %k2
50375009
; X86-KNL-NEXT: vmovaps %zmm1, %zmm0
5038-
; X86-KNL-NEXT: vgatherdps (%eax,%zmm2), %zmm0 {%k2}
5039-
; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm2), %zmm1 {%k1}
5010+
; X86-KNL-NEXT: vgatherdps (%eax,%zmm2,8), %zmm0 {%k2}
5011+
; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm2,8), %zmm1 {%k1}
50405012
; X86-KNL-NEXT: retl
50415013
;
50425014
; X64-SKX-SMALL-LABEL: test_gather_16f32_mask_index_pair:
@@ -5046,23 +5018,11 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
50465018
; X64-SKX-SMALL-NEXT: vpmovd2m %zmm0, %k1
50475019
; X64-SKX-SMALL-NEXT: vmovdqu64 (%rsi), %zmm0
50485020
; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
5049-
; X64-SKX-SMALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
5050-
; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
5051-
; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
5052-
; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm0, %zmm3
5053-
; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm2, %zmm2
5054-
; X64-SKX-SMALL-NEXT: vextractf64x4 $1, %zmm1, %ymm4
5055-
; X64-SKX-SMALL-NEXT: kshiftrw $8, %k1, %k2
5056-
; X64-SKX-SMALL-NEXT: kmovw %k2, %k3
5057-
; X64-SKX-SMALL-NEXT: vmovaps %ymm4, %ymm0
5058-
; X64-SKX-SMALL-NEXT: vgatherqps (%rdi,%zmm2), %ymm0 {%k3}
5059-
; X64-SKX-SMALL-NEXT: vmovaps %ymm1, %ymm5
5060-
; X64-SKX-SMALL-NEXT: kmovw %k1, %k3
5061-
; X64-SKX-SMALL-NEXT: vgatherqps (%rdi,%zmm3), %ymm5 {%k3}
5062-
; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm5, %zmm0
5063-
; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm4 {%k2}
5064-
; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm3), %ymm1 {%k1}
5065-
; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
5021+
; X64-SKX-SMALL-NEXT: vpaddd %zmm0, %zmm0, %zmm2
5022+
; X64-SKX-SMALL-NEXT: kmovw %k1, %k2
5023+
; X64-SKX-SMALL-NEXT: vmovaps %zmm1, %zmm0
5024+
; X64-SKX-SMALL-NEXT: vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
5025+
; X64-SKX-SMALL-NEXT: vgatherdps 4(%rdi,%zmm2,8), %zmm1 {%k1}
50665026
; X64-SKX-SMALL-NEXT: retq
50675027
;
50685028
; X64-SKX-LARGE-LABEL: test_gather_16f32_mask_index_pair:
@@ -5073,23 +5033,11 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
50735033
; X64-SKX-LARGE-NEXT: vmovdqu64 (%rsi), %zmm0
50745034
; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
50755035
; X64-SKX-LARGE-NEXT: vpandd (%rax){1to16}, %zmm0, %zmm0
5076-
; X64-SKX-LARGE-NEXT: vextracti64x4 $1, %zmm0, %ymm2
5077-
; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
5078-
; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
5079-
; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm0, %zmm3
5080-
; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm2, %zmm2
5081-
; X64-SKX-LARGE-NEXT: vextractf64x4 $1, %zmm1, %ymm4
5082-
; X64-SKX-LARGE-NEXT: kshiftrw $8, %k1, %k2
5083-
; X64-SKX-LARGE-NEXT: vmovaps %ymm4, %ymm0
5084-
; X64-SKX-LARGE-NEXT: kmovw %k2, %k3
5085-
; X64-SKX-LARGE-NEXT: vgatherqps (%rdi,%zmm2), %ymm0 {%k3}
5086-
; X64-SKX-LARGE-NEXT: vmovaps %ymm1, %ymm5
5087-
; X64-SKX-LARGE-NEXT: kmovw %k1, %k3
5088-
; X64-SKX-LARGE-NEXT: vgatherqps (%rdi,%zmm3), %ymm5 {%k3}
5089-
; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm0, %zmm5, %zmm0
5090-
; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm4 {%k2}
5091-
; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm3), %ymm1 {%k1}
5092-
; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
5036+
; X64-SKX-LARGE-NEXT: vpaddd %zmm0, %zmm0, %zmm2
5037+
; X64-SKX-LARGE-NEXT: kmovw %k1, %k2
5038+
; X64-SKX-LARGE-NEXT: vmovaps %zmm1, %zmm0
5039+
; X64-SKX-LARGE-NEXT: vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
5040+
; X64-SKX-LARGE-NEXT: vgatherdps 4(%rdi,%zmm2,8), %zmm1 {%k1}
50935041
; X64-SKX-LARGE-NEXT: retq
50945042
;
50955043
; X86-SKX-LABEL: test_gather_16f32_mask_index_pair:
@@ -5099,11 +5047,13 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
50995047
; X86-SKX-NEXT: vpmovd2m %zmm0, %k1
51005048
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
51015049
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx
5102-
; X86-SKX-NEXT: vpslld $4, (%ecx), %zmm2
5050+
; X86-SKX-NEXT: vmovdqu64 (%ecx), %zmm0
5051+
; X86-SKX-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
5052+
; X86-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm2
51035053
; X86-SKX-NEXT: kmovw %k1, %k2
51045054
; X86-SKX-NEXT: vmovaps %zmm1, %zmm0
5105-
; X86-SKX-NEXT: vgatherdps (%eax,%zmm2), %zmm0 {%k2}
5106-
; X86-SKX-NEXT: vgatherdps 4(%eax,%zmm2), %zmm1 {%k1}
5055+
; X86-SKX-NEXT: vgatherdps (%eax,%zmm2,8), %zmm0 {%k2}
5056+
; X86-SKX-NEXT: vgatherdps 4(%eax,%zmm2,8), %zmm1 {%k1}
51075057
; X86-SKX-NEXT: retl
51085058
%wide.load = load <16 x i32>, ptr %arr, align 4
51095059
%and = and <16 x i32> %wide.load, <i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911>

0 commit comments

Comments
 (0)