@@ -2846,12 +2846,12 @@ define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mask0(<8 x float> %vec
28462846define <4 x float > @test_masked_8xfloat_to_4xfloat_perm_mask1 (<8 x float > %vec , <4 x float > %vec2 , <4 x float > %mask ) {
28472847; CHECK-LABEL: test_masked_8xfloat_to_4xfloat_perm_mask1:
28482848; CHECK: # %bb.0:
2849- ; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
2850- ; CHECK-NEXT: vpmovsxbd {{.*#+}} xmm3 = [1,3,5,0]
2851- ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
2852- ; CHECK-NEXT: vcmpeqps %xmm4 , %xmm2 , %k1
2853- ; CHECK-NEXT: vpermps %ymm0 , %ymm3 , %ymm1 {%k1}
2854- ; CHECK-NEXT: vmovaps % xmm1, %xmm0
2849+ ; CHECK-NEXT: vpmovsxbd {{.*#+}} xmm3 = [1,3,5,3]
2850+ ; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm3
2851+ ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
2852+ ; CHECK-NEXT: vxorps %xmm3 , %xmm3 , %xmm3
2853+ ; CHECK-NEXT: vcmpeqps %xmm3 , %xmm2 , %k1
2854+ ; CHECK-NEXT: vblendmps %xmm0, % xmm1, %xmm0 {%k1}
28552855; CHECK-NEXT: vzeroupper
28562856; CHECK-NEXT: retq
28572857 %shuf = shufflevector <8 x float > %vec , <8 x float > undef , <4 x i32 > <i32 1 , i32 3 , i32 5 , i32 0 >
@@ -2863,11 +2863,12 @@ define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mask1(<8 x float> %vec,
28632863define <4 x float > @test_masked_z_8xfloat_to_4xfloat_perm_mask1 (<8 x float > %vec , <4 x float > %mask ) {
28642864; CHECK-LABEL: test_masked_z_8xfloat_to_4xfloat_perm_mask1:
28652865; CHECK: # %bb.0:
2866- ; CHECK-NEXT: vpmovsxbd {{.*#+}} xmm2 = [1,3,5,0]
2867- ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
2868- ; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1
2869- ; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z}
2870- ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2866+ ; CHECK-NEXT: vpmovsxbd {{.*#+}} xmm2 = [1,3,5,3]
2867+ ; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm2
2868+ ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
2869+ ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
2870+ ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
2871+ ; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
28712872; CHECK-NEXT: vzeroupper
28722873; CHECK-NEXT: retq
28732874 %shuf = shufflevector <8 x float > %vec , <8 x float > undef , <4 x i32 > <i32 1 , i32 3 , i32 5 , i32 0 >
@@ -2878,12 +2879,12 @@ define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mask1(<8 x float> %vec
28782879define <4 x float > @test_masked_8xfloat_to_4xfloat_perm_mask2 (<8 x float > %vec , <4 x float > %vec2 , <4 x float > %mask ) {
28792880; CHECK-LABEL: test_masked_8xfloat_to_4xfloat_perm_mask2:
28802881; CHECK: # %bb.0:
2881- ; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
2882- ; CHECK-NEXT: vpmovsxbd {{.*#+}} xmm3 = [3,2,7,0]
2883- ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
2884- ; CHECK-NEXT: vcmpeqps %xmm4 , %xmm2 , %k1
2885- ; CHECK-NEXT: vpermps %ymm0 , %ymm3 , %ymm1 {%k1}
2886- ; CHECK-NEXT: vmovaps % xmm1, %xmm0
2882+ ; CHECK-NEXT: vpmovsxbd {{.*#+}} xmm3 = [3,2,7,3]
2883+ ; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm3
2884+ ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
2885+ ; CHECK-NEXT: vxorps %xmm3 , %xmm3 , %xmm3
2886+ ; CHECK-NEXT: vcmpeqps %xmm3 , %xmm2 , %k1
2887+ ; CHECK-NEXT: vblendmps %xmm0, % xmm1, %xmm0 {%k1}
28872888; CHECK-NEXT: vzeroupper
28882889; CHECK-NEXT: retq
28892890 %shuf = shufflevector <8 x float > %vec , <8 x float > undef , <4 x i32 > <i32 3 , i32 2 , i32 7 , i32 0 >
@@ -2895,11 +2896,12 @@ define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mask2(<8 x float> %vec,
28952896define <4 x float > @test_masked_z_8xfloat_to_4xfloat_perm_mask2 (<8 x float > %vec , <4 x float > %mask ) {
28962897; CHECK-LABEL: test_masked_z_8xfloat_to_4xfloat_perm_mask2:
28972898; CHECK: # %bb.0:
2898- ; CHECK-NEXT: vpmovsxbd {{.*#+}} xmm2 = [3,2,7,0]
2899- ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
2900- ; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1
2901- ; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z}
2902- ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2899+ ; CHECK-NEXT: vpmovsxbd {{.*#+}} xmm2 = [3,2,7,3]
2900+ ; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm2
2901+ ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
2902+ ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
2903+ ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
2904+ ; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
29032905; CHECK-NEXT: vzeroupper
29042906; CHECK-NEXT: retq
29052907 %shuf = shufflevector <8 x float > %vec , <8 x float > undef , <4 x i32 > <i32 3 , i32 2 , i32 7 , i32 0 >
@@ -3885,10 +3887,12 @@ define <2 x double> @test_masked_z_4xdouble_to_2xdouble_perm_mem_mask0(ptr %vp,
38853887define <2 x double > @test_masked_4xdouble_to_2xdouble_perm_mem_mask1 (ptr %vp , <2 x double > %vec2 , <2 x double > %mask ) {
38863888; CHECK-LABEL: test_masked_4xdouble_to_2xdouble_perm_mem_mask1:
38873889; CHECK: # %bb.0:
3888- ; CHECK-NEXT: vmovapd 16(%rdi), %xmm2
3889- ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
3890- ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm1, %k1
3891- ; CHECK-NEXT: vunpcklpd (%rdi), %xmm2, %xmm0 {%k1} # xmm0 {%k1} = xmm2[0],mem[0]
3890+ ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
3891+ ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
3892+ ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
3893+ ; CHECK-NEXT: vpermpd $226, (%rdi), %ymm0 {%k1} # ymm0 {%k1} = mem[2,0,2,3]
3894+ ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
3895+ ; CHECK-NEXT: vzeroupper
38923896; CHECK-NEXT: retq
38933897 %vec = load <4 x double >, ptr %vp
38943898 %shuf = shufflevector <4 x double > %vec , <4 x double > undef , <2 x i32 > <i32 2 , i32 0 >
@@ -3900,10 +3904,11 @@ define <2 x double> @test_masked_4xdouble_to_2xdouble_perm_mem_mask1(ptr %vp, <2
39003904define <2 x double > @test_masked_z_4xdouble_to_2xdouble_perm_mem_mask1 (ptr %vp , <2 x double > %mask ) {
39013905; CHECK-LABEL: test_masked_z_4xdouble_to_2xdouble_perm_mem_mask1:
39023906; CHECK: # %bb.0:
3903- ; CHECK-NEXT: vmovapd 16(%rdi), %xmm1
3904- ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
3905- ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm0, %k1
3906- ; CHECK-NEXT: vunpcklpd (%rdi), %xmm1, %xmm0 {%k1} {z} # xmm0 {%k1} {z} = xmm1[0],mem[0]
3907+ ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
3908+ ; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k1
3909+ ; CHECK-NEXT: vpermpd $226, (%rdi), %ymm0 {%k1} {z} # ymm0 {%k1} {z} = mem[2,0,2,3]
3910+ ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
3911+ ; CHECK-NEXT: vzeroupper
39073912; CHECK-NEXT: retq
39083913 %vec = load <4 x double >, ptr %vp
39093914 %shuf = shufflevector <4 x double > %vec , <4 x double > undef , <2 x i32 > <i32 2 , i32 0 >
@@ -4130,38 +4135,42 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask5(<8 x double>
41304135define <4 x double > @test_8xdouble_to_4xdouble_perm_mask6 (<8 x double > %vec ) {
41314136; CHECK-FAST-LABEL: test_8xdouble_to_4xdouble_perm_mask6:
41324137; CHECK-FAST: # %bb.0:
4133- ; CHECK-FAST-NEXT: vpmovsxbq {{.*#+}} ymm1 = [5,0,7,0]
4134- ; CHECK-FAST-NEXT: vpermpd %zmm0, %zmm1, %zmm0
4135- ; CHECK-FAST-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
4138+ ; CHECK-FAST-NEXT: vpmovsxbq {{.*#+}} xmm1 = [5,0]
4139+ ; CHECK-FAST-NEXT: vpermpd %zmm0, %zmm1, %zmm1
4140+ ; CHECK-FAST-NEXT: vpmovsxbq {{.*#+}} xmm2 = [7,0]
4141+ ; CHECK-FAST-NEXT: vpermpd %zmm0, %zmm2, %zmm0
4142+ ; CHECK-FAST-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
41364143; CHECK-FAST-NEXT: retq
41374144;
41384145; CHECK-FAST-PERLANE-LABEL: test_8xdouble_to_4xdouble_perm_mask6:
41394146; CHECK-FAST-PERLANE: # %bb.0:
4140- ; CHECK-FAST-PERLANE-NEXT: vextractf64x4 $1, %zmm0, % ymm1
4141- ; CHECK-FAST-PERLANE-NEXT: vbroadcastsd %xmm0 , %ymm0
4142- ; CHECK-FAST-PERLANE-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1], ymm0[1],ymm1[3],ymm0[3]
4147+ ; CHECK-FAST-PERLANE-NEXT: vpmovsxbq {{.*#+}} ymm1 = [5,0,7,0]
4148+ ; CHECK-FAST-PERLANE-NEXT: vpermpd %zmm0 , %zmm1, %zmm0
4149+ ; CHECK-FAST-PERLANE-NEXT: # kill: def $ ymm0 killed $ ymm0 killed $zmm0
41434150; CHECK-FAST-PERLANE-NEXT: retq
41444151 %res = shufflevector <8 x double > %vec , <8 x double > undef , <4 x i32 > <i32 5 , i32 0 , i32 7 , i32 0 >
41454152 ret <4 x double > %res
41464153}
41474154define <4 x double > @test_masked_8xdouble_to_4xdouble_perm_mask6 (<8 x double > %vec , <4 x double > %vec2 , <4 x double > %mask ) {
41484155; CHECK-FAST-LABEL: test_masked_8xdouble_to_4xdouble_perm_mask6:
41494156; CHECK-FAST: # %bb.0:
4150- ; CHECK-FAST-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
4151- ; CHECK-FAST-NEXT: vpmovsxbq {{.*#+}} ymm3 = [5,0,7,0]
4152- ; CHECK-FAST-NEXT: vxorpd %xmm4, %xmm4, %xmm4
4153- ; CHECK-FAST-NEXT: vcmpeqpd %ymm4, %ymm2, %k1
4154- ; CHECK-FAST-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1}
4155- ; CHECK-FAST-NEXT: vmovapd %ymm1, %ymm0
4157+ ; CHECK-FAST-NEXT: vpmovsxbq {{.*#+}} xmm3 = [5,0]
4158+ ; CHECK-FAST-NEXT: vpermpd %zmm0, %zmm3, %zmm3
4159+ ; CHECK-FAST-NEXT: vpmovsxbq {{.*#+}} xmm4 = [7,0]
4160+ ; CHECK-FAST-NEXT: vpermpd %zmm0, %zmm4, %zmm0
4161+ ; CHECK-FAST-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
4162+ ; CHECK-FAST-NEXT: vxorpd %xmm3, %xmm3, %xmm3
4163+ ; CHECK-FAST-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
4164+ ; CHECK-FAST-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
41564165; CHECK-FAST-NEXT: retq
41574166;
41584167; CHECK-FAST-PERLANE-LABEL: test_masked_8xdouble_to_4xdouble_perm_mask6:
41594168; CHECK-FAST-PERLANE: # %bb.0:
4160- ; CHECK-FAST-PERLANE-NEXT: vextractf64x4 $1, %zmm0, %ymm3
4161- ; CHECK-FAST-PERLANE-NEXT: vbroadcastsd %xmm0, %ymm0
4169+ ; CHECK-FAST-PERLANE-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
4170+ ; CHECK-FAST-PERLANE-NEXT: vpmovsxbq {{.*#+}} ymm3 = [5,0,7,0]
41624171; CHECK-FAST-PERLANE-NEXT: vxorpd %xmm4, %xmm4, %xmm4
41634172; CHECK-FAST-PERLANE-NEXT: vcmpeqpd %ymm4, %ymm2, %k1
4164- ; CHECK-FAST-PERLANE-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm3[1],ymm0[1],ymm3[3],ymm0[3]
4173+ ; CHECK-FAST-PERLANE-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1}
41654174; CHECK-FAST-PERLANE-NEXT: vmovapd %ymm1, %ymm0
41664175; CHECK-FAST-PERLANE-NEXT: retq
41674176 %shuf = shufflevector <8 x double > %vec , <8 x double > undef , <4 x i32 > <i32 5 , i32 0 , i32 7 , i32 0 >
@@ -4173,20 +4182,23 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask6(<8 x double> %v
41734182define <4 x double > @test_masked_z_8xdouble_to_4xdouble_perm_mask6 (<8 x double > %vec , <4 x double > %mask ) {
41744183; CHECK-FAST-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mask6:
41754184; CHECK-FAST: # %bb.0:
4176- ; CHECK-FAST-NEXT: vpmovsxbq {{.*#+}} ymm2 = [5,0,7,0]
4177- ; CHECK-FAST-NEXT: vxorpd %xmm3, %xmm3, %xmm3
4178- ; CHECK-FAST-NEXT: vcmpeqpd %ymm3, %ymm1, %k1
4179- ; CHECK-FAST-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z}
4180- ; CHECK-FAST-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
4185+ ; CHECK-FAST-NEXT: vpmovsxbq {{.*#+}} xmm2 = [5,0]
4186+ ; CHECK-FAST-NEXT: vpermpd %zmm0, %zmm2, %zmm2
4187+ ; CHECK-FAST-NEXT: vpmovsxbq {{.*#+}} xmm3 = [7,0]
4188+ ; CHECK-FAST-NEXT: vpermpd %zmm0, %zmm3, %zmm0
4189+ ; CHECK-FAST-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
4190+ ; CHECK-FAST-NEXT: vxorpd %xmm2, %xmm2, %xmm2
4191+ ; CHECK-FAST-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
4192+ ; CHECK-FAST-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z}
41814193; CHECK-FAST-NEXT: retq
41824194;
41834195; CHECK-FAST-PERLANE-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mask6:
41844196; CHECK-FAST-PERLANE: # %bb.0:
4185- ; CHECK-FAST-PERLANE-NEXT: vextractf64x4 $1, %zmm0, %ymm2
4186- ; CHECK-FAST-PERLANE-NEXT: vbroadcastsd %xmm0, %ymm0
4197+ ; CHECK-FAST-PERLANE-NEXT: vpmovsxbq {{.*#+}} ymm2 = [5,0,7,0]
41874198; CHECK-FAST-PERLANE-NEXT: vxorpd %xmm3, %xmm3, %xmm3
41884199; CHECK-FAST-PERLANE-NEXT: vcmpeqpd %ymm3, %ymm1, %k1
4189- ; CHECK-FAST-PERLANE-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm2[1],ymm0[1],ymm2[3],ymm0[3]
4200+ ; CHECK-FAST-PERLANE-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z}
4201+ ; CHECK-FAST-PERLANE-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
41904202; CHECK-FAST-PERLANE-NEXT: retq
41914203 %shuf = shufflevector <8 x double > %vec , <8 x double > undef , <4 x i32 > <i32 5 , i32 0 , i32 7 , i32 0 >
41924204 %cmp = fcmp oeq <4 x double > %mask , zeroinitializer
@@ -4493,9 +4505,12 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask2(ptr %vp,
44934505define <4 x double > @test_8xdouble_to_4xdouble_perm_mem_mask3 (ptr %vp ) {
44944506; CHECK-LABEL: test_8xdouble_to_4xdouble_perm_mem_mask3:
44954507; CHECK: # %bb.0:
4496- ; CHECK-NEXT: vmovapd (%rdi), %ymm1
4497- ; CHECK-NEXT: vpmovsxbq {{.*#+}} ymm0 = [4,2,1,0]
4498- ; CHECK-NEXT: vpermi2pd 32(%rdi), %ymm1, %ymm0
4508+ ; CHECK-NEXT: vpmovsxbq {{.*#+}} xmm0 = [4,2]
4509+ ; CHECK-NEXT: vmovaps (%rdi), %zmm1
4510+ ; CHECK-NEXT: vpermpd %zmm1, %zmm0, %zmm0
4511+ ; CHECK-NEXT: vmovddup 8(%rdi), %xmm2 # xmm2 = mem[0,0]
4512+ ; CHECK-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
4513+ ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
44994514; CHECK-NEXT: retq
45004515 %vec = load <8 x double >, ptr %vp
45014516 %res = shufflevector <8 x double > %vec , <8 x double > undef , <4 x i32 > <i32 4 , i32 2 , i32 1 , i32 0 >
@@ -4504,12 +4519,15 @@ define <4 x double> @test_8xdouble_to_4xdouble_perm_mem_mask3(ptr %vp) {
45044519define <4 x double > @test_masked_8xdouble_to_4xdouble_perm_mem_mask3 (ptr %vp , <4 x double > %vec2 , <4 x double > %mask ) {
45054520; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mem_mask3:
45064521; CHECK: # %bb.0:
4507- ; CHECK-NEXT: vmovapd (%rdi), %ymm2
4508- ; CHECK-NEXT: vpmovsxbq {{.*#+}} ymm3 = [4,2,1,0]
4509- ; CHECK-NEXT: vpermi2pd 32(%rdi), %ymm2, %ymm3
4510- ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
4511- ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
4512- ; CHECK-NEXT: vmovapd %ymm3, %ymm0 {%k1}
4522+ ; CHECK-NEXT: vpmovsxbq {{.*#+}} xmm2 = [4,2]
4523+ ; CHECK-NEXT: vmovapd (%rdi), %zmm3
4524+ ; CHECK-NEXT: vpermpd %zmm3, %zmm2, %zmm2
4525+ ; CHECK-NEXT: vmovddup 8(%rdi), %xmm4 # xmm4 = mem[0,0]
4526+ ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0]
4527+ ; CHECK-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
4528+ ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
4529+ ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm1, %k1
4530+ ; CHECK-NEXT: vmovapd %ymm2, %ymm0 {%k1}
45134531; CHECK-NEXT: retq
45144532 %vec = load <8 x double >, ptr %vp
45154533 %shuf = shufflevector <8 x double > %vec , <8 x double > undef , <4 x i32 > <i32 4 , i32 2 , i32 1 , i32 0 >
@@ -4521,12 +4539,15 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask3(ptr %vp, <4
45214539define <4 x double > @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask3 (ptr %vp , <4 x double > %mask ) {
45224540; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mem_mask3:
45234541; CHECK: # %bb.0:
4524- ; CHECK-NEXT: vmovapd (%rdi), %ymm2
4525- ; CHECK-NEXT: vpmovsxbq {{.*#+}} ymm1 = [4,2,1,0]
4526- ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
4527- ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm0, %k1
4528- ; CHECK-NEXT: vpermi2pd 32(%rdi), %ymm2, %ymm1 {%k1} {z}
4529- ; CHECK-NEXT: vmovapd %ymm1, %ymm0
4542+ ; CHECK-NEXT: vpmovsxbq {{.*#+}} xmm1 = [4,2]
4543+ ; CHECK-NEXT: vmovapd (%rdi), %zmm2
4544+ ; CHECK-NEXT: vpermpd %zmm2, %zmm1, %zmm1
4545+ ; CHECK-NEXT: vmovddup 8(%rdi), %xmm3 # xmm3 = mem[0,0]
4546+ ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0]
4547+ ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
4548+ ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
4549+ ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm0, %k1
4550+ ; CHECK-NEXT: vmovapd %ymm1, %ymm0 {%k1} {z}
45304551; CHECK-NEXT: retq
45314552 %vec = load <8 x double >, ptr %vp
45324553 %shuf = shufflevector <8 x double > %vec , <8 x double > undef , <4 x i32 > <i32 4 , i32 2 , i32 1 , i32 0 >
0 commit comments