Skip to content

Commit 66e3906

Browse files
committed
[X86][AVX] Use lowerShuffleAsLanePermuteAndSHUFP to lower binary v4f64 shuffles.
Only perform this if we are shuffling lower and upper lane elements across the lanes (otherwise splitting to lower xmm shuffles would be better). This is a regression if we shuffle build_vectors due to getVectorShuffle canonicalizing 'blend of splat' build vectors, for now I've set this not to shuffle build_vector nodes at all to avoid this.
1 parent b375f28 commit 66e3906

File tree

6 files changed

+103
-174
lines changed

6 files changed

+103
-174
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15879,6 +15879,18 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
1587915879
Zeroable, Subtarget, DAG))
1588015880
return Op;
1588115881

15882+
// If we have lane crossing shuffles AND they don't all come from the lower
15883+
// lane elements, lower to SHUFPD(VPERM2F128(V1, V2), VPERM2F128(V1, V2)).
15884+
// TODO: Handle BUILD_VECTOR sources which getVectorShuffle currently
15885+
// canonicalize to a blend of splat which isn't necessary for this combine.
15886+
if (is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask) &&
15887+
!all_of(Mask, [](int M) { return M < 2 || (4 <= M && M < 6); }) &&
15888+
(V1.getOpcode() != ISD::BUILD_VECTOR) &&
15889+
(V2.getOpcode() != ISD::BUILD_VECTOR))
15890+
if (SDValue Op = lowerShuffleAsLanePermuteAndSHUFP(DL, MVT::v4f64, V1, V2,
15891+
Mask, DAG))
15892+
return Op;
15893+
1588215894
// If we have one input in place, then we can permute the other input and
1588315895
// blend the result.
1588415896
if (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask))

llvm/test/CodeGen/X86/avx-unpack.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,9 @@ define <8 x float> @unpackhips_not(<8 x float> %src1, <8 x float> %src2) nounwin
7373
define <4 x double> @unpackhipd_not(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp {
7474
; CHECK-LABEL: unpackhipd_not:
7575
; CHECK: # %bb.0:
76-
; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1
77-
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
78-
; CHECK-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
79-
; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
80-
; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
76+
; CHECK-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,2,3]
77+
; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
78+
; CHECK-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
8179
; CHECK-NEXT: retq
8280
%shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
8381
ret <4 x double> %shuffle.i

llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll

Lines changed: 46 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -3766,23 +3766,23 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask2(<8 x double>
37663766
define <4 x double> @test_8xdouble_to_4xdouble_perm_mask3(<8 x double> %vec) {
37673767
; CHECK-LABEL: test_8xdouble_to_4xdouble_perm_mask3:
37683768
; CHECK: # %bb.0:
3769-
; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm2
3770-
; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [0,2,1,4]
3771-
; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1
3772-
; CHECK-NEXT: vmovapd %ymm1, %ymm0
3769+
; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [0,2,1,4,0,2,1,4]
3770+
; CHECK-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
3771+
; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0
3772+
; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
37733773
; CHECK-NEXT: retq
37743774
%res = shufflevector <8 x double> %vec, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 4>
37753775
ret <4 x double> %res
37763776
}
37773777
define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask3(<8 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
37783778
; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mask3:
37793779
; CHECK: # %bb.0:
3780-
; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm3
3781-
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [0,2,1,4]
3782-
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm4
3783-
; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0
3784-
; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1
3785-
; CHECK-NEXT: vblendmpd %ymm4, %ymm1, %ymm0 {%k1}
3780+
; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm3 = [0,2,1,4,0,2,1,4]
3781+
; CHECK-NEXT: # zmm3 = mem[0,1,2,3,0,1,2,3]
3782+
; CHECK-NEXT: vpermq %zmm0, %zmm3, %zmm0
3783+
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
3784+
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
3785+
; CHECK-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
37863786
; CHECK-NEXT: retq
37873787
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 4>
37883788
%cmp = fcmp oeq <4 x double> %mask, zeroinitializer
@@ -3793,12 +3793,11 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask3(<8 x double> %v
37933793
define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask3(<8 x double> %vec, <4 x double> %mask) {
37943794
; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mask3:
37953795
; CHECK: # %bb.0:
3796-
; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm3
37973796
; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [0,2,1,4]
3798-
; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
3799-
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm1, %k1
3800-
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm2 {%k1} {z}
3801-
; CHECK-NEXT: vmovapd %ymm2, %ymm0
3797+
; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
3798+
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm1, %k1
3799+
; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z}
3800+
; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
38023801
; CHECK-NEXT: retq
38033802
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 4>
38043803
%cmp = fcmp oeq <4 x double> %mask, zeroinitializer
@@ -3869,9 +3868,9 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask5(<8 x double>
38693868
define <4 x double> @test_8xdouble_to_4xdouble_perm_mask6(<8 x double> %vec) {
38703869
; CHECK-LABEL: test_8xdouble_to_4xdouble_perm_mask6:
38713870
; CHECK: # %bb.0:
3872-
; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [5,0,7,0,5,0,7,0]
3871+
; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [5,8,7,8,5,8,7,8]
38733872
; CHECK-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
3874-
; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0
3873+
; CHECK-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0
38753874
; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
38763875
; CHECK-NEXT: retq
38773876
%res = shufflevector <8 x double> %vec, <8 x double> undef, <4 x i32> <i32 5, i32 0, i32 7, i32 0>
@@ -3880,12 +3879,12 @@ define <4 x double> @test_8xdouble_to_4xdouble_perm_mask6(<8 x double> %vec) {
38803879
define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask6(<8 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
38813880
; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mask6:
38823881
; CHECK: # %bb.0:
3883-
; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm3 = [5,0,7,0,5,0,7,0]
3882+
; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm3 = [5,8,7,8,5,8,7,8]
38843883
; CHECK-NEXT: # zmm3 = mem[0,1,2,3,0,1,2,3]
3885-
; CHECK-NEXT: vpermq %zmm0, %zmm3, %zmm0
3886-
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
3887-
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
3888-
; CHECK-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
3884+
; CHECK-NEXT: vpermi2pd %zmm0, %zmm0, %zmm3
3885+
; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0
3886+
; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1
3887+
; CHECK-NEXT: vblendmpd %ymm3, %ymm1, %ymm0 {%k1}
38893888
; CHECK-NEXT: retq
38903889
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <4 x i32> <i32 5, i32 0, i32 7, i32 0>
38913890
%cmp = fcmp oeq <4 x double> %mask, zeroinitializer
@@ -3896,10 +3895,10 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask6(<8 x double> %v
38963895
define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask6(<8 x double> %vec, <4 x double> %mask) {
38973896
; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mask6:
38983897
; CHECK: # %bb.0:
3899-
; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [5,0,7,0]
3898+
; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [5,8,7,8]
39003899
; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
39013900
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm1, %k1
3902-
; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z}
3901+
; CHECK-NEXT: vpermt2pd %zmm0, %zmm2, %zmm0 {%k1} {z}
39033902
; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
39043903
; CHECK-NEXT: retq
39053904
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <4 x i32> <i32 5, i32 0, i32 7, i32 0>
@@ -3983,9 +3982,8 @@ define <2 x double> @test_masked_z_8xdouble_to_2xdouble_perm_mask0(<8 x double>
39833982
define <2 x double> @test_masked_8xdouble_to_2xdouble_perm_mask1(<8 x double> %vec, <2 x double> %vec2, <2 x double> %mask) {
39843983
; CHECK-LABEL: test_masked_8xdouble_to_2xdouble_perm_mask1:
39853984
; CHECK: # %bb.0:
3986-
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
3987-
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm3[1],ymm0[3],ymm3[3]
3988-
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
3985+
; CHECK-NEXT: vmovapd {{.*#+}} xmm3 = [3,7]
3986+
; CHECK-NEXT: vpermpd %zmm0, %zmm3, %zmm0
39893987
; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
39903988
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
39913989
; CHECK-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
@@ -4000,12 +3998,11 @@ define <2 x double> @test_masked_8xdouble_to_2xdouble_perm_mask1(<8 x double> %v
40003998
define <2 x double> @test_masked_z_8xdouble_to_2xdouble_perm_mask1(<8 x double> %vec, <2 x double> %mask) {
40013999
; CHECK-LABEL: test_masked_z_8xdouble_to_2xdouble_perm_mask1:
40024000
; CHECK: # %bb.0:
4003-
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2
4004-
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
4005-
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
4006-
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
4007-
; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
4008-
; CHECK-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z}
4001+
; CHECK-NEXT: vmovapd {{.*#+}} xmm2 = [3,7]
4002+
; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
4003+
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm1, %k1
4004+
; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z}
4005+
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
40094006
; CHECK-NEXT: vzeroupper
40104007
; CHECK-NEXT: retq
40114008
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <2 x i32> <i32 3, i32 7>
@@ -4062,8 +4059,8 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask1(<8 x double
40624059
; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mem_mask1:
40634060
; CHECK: # %bb.0:
40644061
; CHECK-NEXT: vmovapd (%rdi), %ymm2
4065-
; CHECK-NEXT: vmovapd {{.*#+}} ymm3 = [3,4,2,4]
4066-
; CHECK-NEXT: vpermi2pd 32(%rdi), %ymm2, %ymm3
4062+
; CHECK-NEXT: vmovapd {{.*#+}} ymm3 = [3,4,2,6]
4063+
; CHECK-NEXT: vpermi2pd 32(%rdi){1to4}, %ymm2, %ymm3
40674064
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
40684065
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
40694066
; CHECK-NEXT: vmovapd %ymm3, %ymm0 {%k1}
@@ -4079,10 +4076,10 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask1(<8 x doub
40794076
; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mem_mask1:
40804077
; CHECK: # %bb.0:
40814078
; CHECK-NEXT: vmovapd (%rdi), %ymm2
4082-
; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [3,4,2,4]
4079+
; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [3,4,2,6]
40834080
; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
40844081
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm0, %k1
4085-
; CHECK-NEXT: vpermi2pd 32(%rdi), %ymm2, %ymm1 {%k1} {z}
4082+
; CHECK-NEXT: vpermi2pd 32(%rdi){1to4}, %ymm2, %ymm1 {%k1} {z}
40864083
; CHECK-NEXT: vmovapd %ymm1, %ymm0
40874084
; CHECK-NEXT: retq
40884085
%vec = load <8 x double>, <8 x double>* %vp
@@ -4242,10 +4239,9 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask5(<8 x doub
42424239
define <4 x double> @test_8xdouble_to_4xdouble_perm_mem_mask6(<8 x double>* %vp) {
42434240
; CHECK-LABEL: test_8xdouble_to_4xdouble_perm_mem_mask6:
42444241
; CHECK: # %bb.0:
4245-
; CHECK-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm1
4246-
; CHECK-NEXT: vmovapd 32(%rdi), %ymm2
4247-
; CHECK-NEXT: vmovapd {{.*#+}} ymm0 = [0,2,6,1]
4248-
; CHECK-NEXT: vpermi2pd %ymm1, %ymm2, %ymm0
4242+
; CHECK-NEXT: vmovapd 32(%rdi), %ymm1
4243+
; CHECK-NEXT: vmovapd {{.*#+}} ymm0 = [0,2,4,1]
4244+
; CHECK-NEXT: vpermi2pd (%rdi), %ymm1, %ymm0
42494245
; CHECK-NEXT: retq
42504246
%vec = load <8 x double>, <8 x double>* %vp
42514247
%res = shufflevector <8 x double> %vec, <8 x double> undef, <4 x i32> <i32 4, i32 6, i32 0, i32 5>
@@ -4254,13 +4250,12 @@ define <4 x double> @test_8xdouble_to_4xdouble_perm_mem_mask6(<8 x double>* %vp)
42544250
define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask6(<8 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
42554251
; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mem_mask6:
42564252
; CHECK: # %bb.0:
4257-
; CHECK-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm2
4258-
; CHECK-NEXT: vmovapd 32(%rdi), %ymm3
4259-
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [0,2,6,1]
4260-
; CHECK-NEXT: vpermi2pd %ymm2, %ymm3, %ymm4
4253+
; CHECK-NEXT: vmovapd 32(%rdi), %ymm2
4254+
; CHECK-NEXT: vmovapd {{.*#+}} ymm3 = [0,2,4,1]
4255+
; CHECK-NEXT: vpermi2pd (%rdi), %ymm2, %ymm3
42614256
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
42624257
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
4263-
; CHECK-NEXT: vmovapd %ymm4, %ymm0 {%k1}
4258+
; CHECK-NEXT: vmovapd %ymm3, %ymm0 {%k1}
42644259
; CHECK-NEXT: retq
42654260
%vec = load <8 x double>, <8 x double>* %vp
42664261
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <4 x i32> <i32 4, i32 6, i32 0, i32 5>
@@ -4272,12 +4267,11 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask6(<8 x double
42724267
define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask6(<8 x double>* %vp, <4 x double> %mask) {
42734268
; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mem_mask6:
42744269
; CHECK: # %bb.0:
4275-
; CHECK-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm2
4276-
; CHECK-NEXT: vmovapd 32(%rdi), %ymm3
4277-
; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [0,2,6,1]
4278-
; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
4279-
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm0, %k1
4280-
; CHECK-NEXT: vpermi2pd %ymm2, %ymm3, %ymm1 {%k1} {z}
4270+
; CHECK-NEXT: vmovapd 32(%rdi), %ymm2
4271+
; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [0,2,4,1]
4272+
; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
4273+
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm0, %k1
4274+
; CHECK-NEXT: vpermi2pd (%rdi), %ymm2, %ymm1 {%k1} {z}
42814275
; CHECK-NEXT: vmovapd %ymm1, %ymm0
42824276
; CHECK-NEXT: retq
42834277
%vec = load <8 x double>, <8 x double>* %vp

llvm/test/CodeGen/X86/subvector-broadcast.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1462,13 +1462,13 @@ define <8 x float> @broadcast_v8f32_v2f32_u1uu0uEu(<2 x float>* %vp, <8 x float>
14621462
; X32: # %bb.0:
14631463
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
14641464
; X32-NEXT: vbroadcastsd (%eax), %ymm1
1465-
; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
1465+
; X32-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[3]
14661466
; X32-NEXT: retl
14671467
;
14681468
; X64-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
14691469
; X64: # %bb.0:
14701470
; X64-NEXT: vbroadcastsd (%rdi), %ymm1
1471-
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
1471+
; X64-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[3]
14721472
; X64-NEXT: retq
14731473
%vec = load <2 x float>, <2 x float>* %vp
14741474
%shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 undef, i32 1, i32 undef, i32 undef, i32 0, i32 2, i32 3, i32 undef>

0 commit comments

Comments
 (0)