@@ -3766,23 +3766,23 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask2(<8 x double>
3766
3766
define <4 x double > @test_8xdouble_to_4xdouble_perm_mask3 (<8 x double > %vec ) {
3767
3767
; CHECK-LABEL: test_8xdouble_to_4xdouble_perm_mask3:
3768
3768
; CHECK: # %bb.0:
3769
- ; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm2
3770
- ; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [0,2,1,4 ]
3771
- ; CHECK-NEXT: vpermi2pd %ymm2 , %ymm0 , %ymm1
3772
- ; CHECK-NEXT: vmovapd %ymm1, % ymm0
3769
+ ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [0,2,1,4,0,2,1,4]
3770
+ ; CHECK-NEXT: # zmm1 = mem [0,1,2,3,0,1,2,3 ]
3771
+ ; CHECK-NEXT: vpermq %zmm0 , %zmm1 , %zmm0
3772
+ ; CHECK-NEXT: # kill: def $ ymm0 killed $ymm0 killed $zmm0
3773
3773
; CHECK-NEXT: retq
3774
3774
%res = shufflevector <8 x double > %vec , <8 x double > undef , <4 x i32 > <i32 0 , i32 2 , i32 1 , i32 4 >
3775
3775
ret <4 x double > %res
3776
3776
}
3777
3777
define <4 x double > @test_masked_8xdouble_to_4xdouble_perm_mask3 (<8 x double > %vec , <4 x double > %vec2 , <4 x double > %mask ) {
3778
3778
; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mask3:
3779
3779
; CHECK: # %bb.0:
3780
- ; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm3
3781
- ; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [0,2,1,4 ]
3782
- ; CHECK-NEXT: vpermi2pd %ymm3 , %ymm0 , %ymm4
3783
- ; CHECK-NEXT: vxorpd %xmm0 , %xmm0 , %xmm0
3784
- ; CHECK-NEXT: vcmpeqpd %ymm0 , %ymm2, %k1
3785
- ; CHECK-NEXT: vblendmpd %ymm4 , %ymm1, %ymm0 {%k1}
3780
+ ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm3 = [0,2,1,4,0,2,1,4]
3781
+ ; CHECK-NEXT: # zmm3 = mem [0,1,2,3,0,1,2,3 ]
3782
+ ; CHECK-NEXT: vpermq %zmm0 , %zmm3 , %zmm0
3783
+ ; CHECK-NEXT: vpxor %xmm3 , %xmm3 , %xmm3
3784
+ ; CHECK-NEXT: vcmpeqpd %ymm3 , %ymm2, %k1
3785
+ ; CHECK-NEXT: vblendmpd %ymm0 , %ymm1, %ymm0 {%k1}
3786
3786
; CHECK-NEXT: retq
3787
3787
%shuf = shufflevector <8 x double > %vec , <8 x double > undef , <4 x i32 > <i32 0 , i32 2 , i32 1 , i32 4 >
3788
3788
%cmp = fcmp oeq <4 x double > %mask , zeroinitializer
@@ -3793,12 +3793,11 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask3(<8 x double> %v
3793
3793
define <4 x double > @test_masked_z_8xdouble_to_4xdouble_perm_mask3 (<8 x double > %vec , <4 x double > %mask ) {
3794
3794
; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mask3:
3795
3795
; CHECK: # %bb.0:
3796
- ; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm3
3797
3796
; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [0,2,1,4]
3798
- ; CHECK-NEXT: vxorpd %xmm4 , %xmm4 , %xmm4
3799
- ; CHECK-NEXT: vcmpeqpd %ymm4 , %ymm1, %k1
3800
- ; CHECK-NEXT: vpermi2pd %ymm3 , %ymm0 , %ymm2 {%k1} {z}
3801
- ; CHECK-NEXT: vmovapd %ymm2, % ymm0
3797
+ ; CHECK-NEXT: vxorpd %xmm3 , %xmm3 , %xmm3
3798
+ ; CHECK-NEXT: vcmpeqpd %ymm3 , %ymm1, %k1
3799
+ ; CHECK-NEXT: vpermpd %zmm0 , %zmm2 , %zmm0 {%k1} {z}
3800
+ ; CHECK-NEXT: # kill: def $ ymm0 killed $ymm0 killed $zmm0
3802
3801
; CHECK-NEXT: retq
3803
3802
%shuf = shufflevector <8 x double > %vec , <8 x double > undef , <4 x i32 > <i32 0 , i32 2 , i32 1 , i32 4 >
3804
3803
%cmp = fcmp oeq <4 x double > %mask , zeroinitializer
@@ -3869,9 +3868,9 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask5(<8 x double>
3869
3868
define <4 x double > @test_8xdouble_to_4xdouble_perm_mask6 (<8 x double > %vec ) {
3870
3869
; CHECK-LABEL: test_8xdouble_to_4xdouble_perm_mask6:
3871
3870
; CHECK: # %bb.0:
3872
- ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [5,0 ,7,0 ,5,0 ,7,0 ]
3871
+ ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [5,8 ,7,8 ,5,8 ,7,8 ]
3873
3872
; CHECK-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
3874
- ; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0
3873
+ ; CHECK-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0
3875
3874
; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
3876
3875
; CHECK-NEXT: retq
3877
3876
%res = shufflevector <8 x double > %vec , <8 x double > undef , <4 x i32 > <i32 5 , i32 0 , i32 7 , i32 0 >
@@ -3880,12 +3879,12 @@ define <4 x double> @test_8xdouble_to_4xdouble_perm_mask6(<8 x double> %vec) {
3880
3879
define <4 x double > @test_masked_8xdouble_to_4xdouble_perm_mask6 (<8 x double > %vec , <4 x double > %vec2 , <4 x double > %mask ) {
3881
3880
; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mask6:
3882
3881
; CHECK: # %bb.0:
3883
- ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm3 = [5,0 ,7,0 ,5,0 ,7,0 ]
3882
+ ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm3 = [5,8 ,7,8 ,5,8 ,7,8 ]
3884
3883
; CHECK-NEXT: # zmm3 = mem[0,1,2,3,0,1,2,3]
3885
- ; CHECK-NEXT: vpermq %zmm0, %zmm3 , %zmm0
3886
- ; CHECK-NEXT: vpxor %xmm3 , %xmm3 , %xmm3
3887
- ; CHECK-NEXT: vcmpeqpd %ymm3 , %ymm2, %k1
3888
- ; CHECK-NEXT: vblendmpd %ymm0 , %ymm1, %ymm0 {%k1}
3884
+ ; CHECK-NEXT: vpermi2pd %zmm0, %zmm0 , %zmm3
3885
+ ; CHECK-NEXT: vxorpd %xmm0 , %xmm0 , %xmm0
3886
+ ; CHECK-NEXT: vcmpeqpd %ymm0 , %ymm2, %k1
3887
+ ; CHECK-NEXT: vblendmpd %ymm3 , %ymm1, %ymm0 {%k1}
3889
3888
; CHECK-NEXT: retq
3890
3889
%shuf = shufflevector <8 x double > %vec , <8 x double > undef , <4 x i32 > <i32 5 , i32 0 , i32 7 , i32 0 >
3891
3890
%cmp = fcmp oeq <4 x double > %mask , zeroinitializer
@@ -3896,10 +3895,10 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask6(<8 x double> %v
3896
3895
define <4 x double > @test_masked_z_8xdouble_to_4xdouble_perm_mask6 (<8 x double > %vec , <4 x double > %mask ) {
3897
3896
; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mask6:
3898
3897
; CHECK: # %bb.0:
3899
- ; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [5,0 ,7,0 ]
3898
+ ; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [5,8 ,7,8 ]
3900
3899
; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
3901
3900
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm1, %k1
3902
- ; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z}
3901
+ ; CHECK-NEXT: vpermt2pd %zmm0, %zmm2, %zmm0 {%k1} {z}
3903
3902
; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
3904
3903
; CHECK-NEXT: retq
3905
3904
%shuf = shufflevector <8 x double > %vec , <8 x double > undef , <4 x i32 > <i32 5 , i32 0 , i32 7 , i32 0 >
@@ -3983,9 +3982,8 @@ define <2 x double> @test_masked_z_8xdouble_to_2xdouble_perm_mask0(<8 x double>
3983
3982
define <2 x double > @test_masked_8xdouble_to_2xdouble_perm_mask1 (<8 x double > %vec , <2 x double > %vec2 , <2 x double > %mask ) {
3984
3983
; CHECK-LABEL: test_masked_8xdouble_to_2xdouble_perm_mask1:
3985
3984
; CHECK: # %bb.0:
3986
- ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
3987
- ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm3[1],ymm0[3],ymm3[3]
3988
- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
3985
+ ; CHECK-NEXT: vmovapd {{.*#+}} xmm3 = [3,7]
3986
+ ; CHECK-NEXT: vpermpd %zmm0, %zmm3, %zmm0
3989
3987
; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
3990
3988
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
3991
3989
; CHECK-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
@@ -4000,12 +3998,11 @@ define <2 x double> @test_masked_8xdouble_to_2xdouble_perm_mask1(<8 x double> %v
4000
3998
define <2 x double > @test_masked_z_8xdouble_to_2xdouble_perm_mask1 (<8 x double > %vec , <2 x double > %mask ) {
4001
3999
; CHECK-LABEL: test_masked_z_8xdouble_to_2xdouble_perm_mask1:
4002
4000
; CHECK: # %bb.0:
4003
- ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2
4004
- ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
4005
- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
4006
- ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
4007
- ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
4008
- ; CHECK-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z}
4001
+ ; CHECK-NEXT: vmovapd {{.*#+}} xmm2 = [3,7]
4002
+ ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
4003
+ ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm1, %k1
4004
+ ; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z}
4005
+ ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
4009
4006
; CHECK-NEXT: vzeroupper
4010
4007
; CHECK-NEXT: retq
4011
4008
%shuf = shufflevector <8 x double > %vec , <8 x double > undef , <2 x i32 > <i32 3 , i32 7 >
@@ -4062,8 +4059,8 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask1(<8 x double
4062
4059
; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mem_mask1:
4063
4060
; CHECK: # %bb.0:
4064
4061
; CHECK-NEXT: vmovapd (%rdi), %ymm2
4065
- ; CHECK-NEXT: vmovapd {{.*#+}} ymm3 = [3,4,2,4 ]
4066
- ; CHECK-NEXT: vpermi2pd 32(%rdi), %ymm2, %ymm3
4062
+ ; CHECK-NEXT: vmovapd {{.*#+}} ymm3 = [3,4,2,6 ]
4063
+ ; CHECK-NEXT: vpermi2pd 32(%rdi){1to4} , %ymm2, %ymm3
4067
4064
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
4068
4065
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
4069
4066
; CHECK-NEXT: vmovapd %ymm3, %ymm0 {%k1}
@@ -4079,10 +4076,10 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask1(<8 x doub
4079
4076
; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mem_mask1:
4080
4077
; CHECK: # %bb.0:
4081
4078
; CHECK-NEXT: vmovapd (%rdi), %ymm2
4082
- ; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [3,4,2,4 ]
4079
+ ; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [3,4,2,6 ]
4083
4080
; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
4084
4081
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm0, %k1
4085
- ; CHECK-NEXT: vpermi2pd 32(%rdi), %ymm2, %ymm1 {%k1} {z}
4082
+ ; CHECK-NEXT: vpermi2pd 32(%rdi){1to4} , %ymm2, %ymm1 {%k1} {z}
4086
4083
; CHECK-NEXT: vmovapd %ymm1, %ymm0
4087
4084
; CHECK-NEXT: retq
4088
4085
%vec = load <8 x double >, <8 x double >* %vp
@@ -4242,10 +4239,9 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask5(<8 x doub
4242
4239
define <4 x double > @test_8xdouble_to_4xdouble_perm_mem_mask6 (<8 x double >* %vp ) {
4243
4240
; CHECK-LABEL: test_8xdouble_to_4xdouble_perm_mem_mask6:
4244
4241
; CHECK: # %bb.0:
4245
- ; CHECK-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm1
4246
- ; CHECK-NEXT: vmovapd 32(%rdi), %ymm2
4247
- ; CHECK-NEXT: vmovapd {{.*#+}} ymm0 = [0,2,6,1]
4248
- ; CHECK-NEXT: vpermi2pd %ymm1, %ymm2, %ymm0
4242
+ ; CHECK-NEXT: vmovapd 32(%rdi), %ymm1
4243
+ ; CHECK-NEXT: vmovapd {{.*#+}} ymm0 = [0,2,4,1]
4244
+ ; CHECK-NEXT: vpermi2pd (%rdi), %ymm1, %ymm0
4249
4245
; CHECK-NEXT: retq
4250
4246
%vec = load <8 x double >, <8 x double >* %vp
4251
4247
%res = shufflevector <8 x double > %vec , <8 x double > undef , <4 x i32 > <i32 4 , i32 6 , i32 0 , i32 5 >
@@ -4254,13 +4250,12 @@ define <4 x double> @test_8xdouble_to_4xdouble_perm_mem_mask6(<8 x double>* %vp)
4254
4250
define <4 x double > @test_masked_8xdouble_to_4xdouble_perm_mem_mask6 (<8 x double >* %vp , <4 x double > %vec2 , <4 x double > %mask ) {
4255
4251
; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mem_mask6:
4256
4252
; CHECK: # %bb.0:
4257
- ; CHECK-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm2
4258
- ; CHECK-NEXT: vmovapd 32(%rdi), %ymm3
4259
- ; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [0,2,6,1]
4260
- ; CHECK-NEXT: vpermi2pd %ymm2, %ymm3, %ymm4
4253
+ ; CHECK-NEXT: vmovapd 32(%rdi), %ymm2
4254
+ ; CHECK-NEXT: vmovapd {{.*#+}} ymm3 = [0,2,4,1]
4255
+ ; CHECK-NEXT: vpermi2pd (%rdi), %ymm2, %ymm3
4261
4256
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
4262
4257
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
4263
- ; CHECK-NEXT: vmovapd %ymm4 , %ymm0 {%k1}
4258
+ ; CHECK-NEXT: vmovapd %ymm3 , %ymm0 {%k1}
4264
4259
; CHECK-NEXT: retq
4265
4260
%vec = load <8 x double >, <8 x double >* %vp
4266
4261
%shuf = shufflevector <8 x double > %vec , <8 x double > undef , <4 x i32 > <i32 4 , i32 6 , i32 0 , i32 5 >
@@ -4272,12 +4267,11 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask6(<8 x double
4272
4267
define <4 x double > @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask6 (<8 x double >* %vp , <4 x double > %mask ) {
4273
4268
; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mem_mask6:
4274
4269
; CHECK: # %bb.0:
4275
- ; CHECK-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm2
4276
- ; CHECK-NEXT: vmovapd 32(%rdi), %ymm3
4277
- ; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [0,2,6,1]
4278
- ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
4279
- ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm0, %k1
4280
- ; CHECK-NEXT: vpermi2pd %ymm2, %ymm3, %ymm1 {%k1} {z}
4270
+ ; CHECK-NEXT: vmovapd 32(%rdi), %ymm2
4271
+ ; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [0,2,4,1]
4272
+ ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
4273
+ ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm0, %k1
4274
+ ; CHECK-NEXT: vpermi2pd (%rdi), %ymm2, %ymm1 {%k1} {z}
4281
4275
; CHECK-NEXT: vmovapd %ymm1, %ymm0
4282
4276
; CHECK-NEXT: retq
4283
4277
%vec = load <8 x double >, <8 x double >* %vp
0 commit comments