Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit 6804946

Browse files
committed
[X86][SSE] Combine 128-bit target shuffles to PACKSS/PACKUS.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@316845 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent f317897 commit 6804946

File tree

4 files changed

+39
-45
lines changed

4 files changed

+39
-45
lines changed

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27698,6 +27698,16 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
2769827698
}
2769927699
}
2770027700

27701+
// Attempt to match against either a unary or binary PACKSS/PACKUS shuffle.
27702+
// TODO add support for 256/512-bit types.
27703+
if ((MaskVT == MVT::v8i16 || MaskVT == MVT::v16i8) && Subtarget.hasSSE2()) {
27704+
if (matchVectorShuffleWithPACK(MaskVT, SrcVT, V1, V2, Shuffle, Mask, DAG,
27705+
Subtarget)) {
27706+
DstVT = MaskVT;
27707+
return true;
27708+
}
27709+
}
27710+
2770127711
// Attempt to match against either a unary or binary UNPCKL/UNPCKH shuffle.
2770227712
if ((MaskVT == MVT::v4f32 && Subtarget.hasSSE1()) ||
2770327713
(MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||

test/CodeGen/X86/avg.ll

Lines changed: 23 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2747,24 +2747,19 @@ define void @avg_v16i16_const(<16 x i16>* %a) nounwind {
27472747
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
27482748
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
27492749
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
2750-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [5,6,7,8]
2750+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1,2,3,4]
27512751
; AVX1-NEXT: vpaddd %xmm4, %xmm3, %xmm3
2752-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [1,2,3,4]
2752+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [5,6,7,8]
27532753
; AVX1-NEXT: vpaddd %xmm5, %xmm2, %xmm2
27542754
; AVX1-NEXT: vpaddd %xmm4, %xmm1, %xmm1
27552755
; AVX1-NEXT: vpaddd %xmm5, %xmm0, %xmm0
27562756
; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
27572757
; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
2758-
; AVX1-NEXT: vpsrld $1, %xmm2, %xmm2
2759-
; AVX1-NEXT: vpsrld $1, %xmm3, %xmm3
2760-
; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
2761-
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1],xmm3[2],xmm4[3],xmm3[4],xmm4[5],xmm3[6],xmm4[7]
2762-
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1],xmm2[2],xmm4[3],xmm2[4],xmm4[5],xmm2[6],xmm4[7]
2763-
; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2
2764-
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3],xmm1[4],xmm4[5],xmm1[6],xmm4[7]
2765-
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7]
2766-
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2767-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2758+
; AVX1-NEXT: vpackusdw %xmm0, %xmm1, %xmm0
2759+
; AVX1-NEXT: vpsrld $1, %xmm2, %xmm1
2760+
; AVX1-NEXT: vpsrld $1, %xmm3, %xmm2
2761+
; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1
2762+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
27682763
; AVX1-NEXT: vmovups %ymm0, (%rax)
27692764
; AVX1-NEXT: vzeroupper
27702765
; AVX1-NEXT: retq
@@ -2867,41 +2862,32 @@ define void @avg_v32i16_const(<32 x i16>* %a) nounwind {
28672862
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm5 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
28682863
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm6 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
28692864
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm7 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
2870-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [5,6,7,8]
2865+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [1,2,3,4]
28712866
; AVX1-NEXT: vpaddd %xmm0, %xmm7, %xmm9
2872-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [1,2,3,4]
2867+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [5,6,7,8]
28732868
; AVX1-NEXT: vpaddd %xmm7, %xmm6, %xmm6
28742869
; AVX1-NEXT: vpaddd %xmm0, %xmm5, %xmm5
28752870
; AVX1-NEXT: vpaddd %xmm7, %xmm4, %xmm4
28762871
; AVX1-NEXT: vpaddd %xmm0, %xmm3, %xmm3
28772872
; AVX1-NEXT: vpaddd %xmm7, %xmm2, %xmm2
28782873
; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
28792874
; AVX1-NEXT: vpaddd %xmm7, %xmm8, %xmm1
2880-
; AVX1-NEXT: vpsrld $1, %xmm1, %xmm8
2875+
; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
28812876
; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
2882-
; AVX1-NEXT: vpsrld $1, %xmm2, %xmm2
2883-
; AVX1-NEXT: vpsrld $1, %xmm3, %xmm3
2884-
; AVX1-NEXT: vpsrld $1, %xmm4, %xmm4
2885-
; AVX1-NEXT: vpsrld $1, %xmm5, %xmm5
2886-
; AVX1-NEXT: vpsrld $1, %xmm6, %xmm6
2887-
; AVX1-NEXT: vpsrld $1, %xmm9, %xmm7
2888-
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
2889-
; AVX1-NEXT: vpblendw {{.*#+}} xmm7 = xmm7[0],xmm1[1],xmm7[2],xmm1[3],xmm7[4],xmm1[5],xmm7[6],xmm1[7]
2890-
; AVX1-NEXT: vpblendw {{.*#+}} xmm6 = xmm6[0],xmm1[1],xmm6[2],xmm1[3],xmm6[4],xmm1[5],xmm6[6],xmm1[7]
2891-
; AVX1-NEXT: vpackusdw %xmm7, %xmm6, %xmm6
2892-
; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0],xmm1[1],xmm5[2],xmm1[3],xmm5[4],xmm1[5],xmm5[6],xmm1[7]
2893-
; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0],xmm1[1],xmm4[2],xmm1[3],xmm4[4],xmm1[5],xmm4[6],xmm1[7]
2894-
; AVX1-NEXT: vpackusdw %xmm5, %xmm4, %xmm4
2895-
; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4
2896-
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm1[1],xmm3[2],xmm1[3],xmm3[4],xmm1[5],xmm3[6],xmm1[7]
2897-
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm1[1],xmm2[2],xmm1[3],xmm2[4],xmm1[5],xmm2[6],xmm1[7]
2898-
; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2
2899-
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
2900-
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm8[0],xmm1[1],xmm8[2],xmm1[3],xmm8[4],xmm1[5],xmm8[6],xmm1[7]
2901-
; AVX1-NEXT: vpackusdw %xmm0, %xmm1, %xmm0
2902-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2877+
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2878+
; AVX1-NEXT: vpsrld $1, %xmm2, %xmm1
2879+
; AVX1-NEXT: vpsrld $1, %xmm3, %xmm2
2880+
; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1
2881+
; AVX1-NEXT: vpsrld $1, %xmm4, %xmm2
2882+
; AVX1-NEXT: vpsrld $1, %xmm5, %xmm3
2883+
; AVX1-NEXT: vpackusdw %xmm2, %xmm3, %xmm2
2884+
; AVX1-NEXT: vpsrld $1, %xmm6, %xmm3
2885+
; AVX1-NEXT: vpsrld $1, %xmm9, %xmm4
2886+
; AVX1-NEXT: vpackusdw %xmm3, %xmm4, %xmm3
2887+
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
2888+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
29032889
; AVX1-NEXT: vmovups %ymm0, (%rax)
2904-
; AVX1-NEXT: vmovups %ymm4, (%rax)
2890+
; AVX1-NEXT: vmovups %ymm2, (%rax)
29052891
; AVX1-NEXT: vzeroupper
29062892
; AVX1-NEXT: retq
29072893
;

test/CodeGen/X86/trunc-ext-ld-st.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ define void @load_8_i8(<8 x i8>* %A) {
149149
; SSE41: # BB#0:
150150
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
151151
; SSE41-NEXT: paddb %xmm0, %xmm0
152-
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
152+
; SSE41-NEXT: packuswb %xmm0, %xmm0
153153
; SSE41-NEXT: movq %xmm0, (%rdi)
154154
; SSE41-NEXT: retq
155155
%T = load <8 x i8>, <8 x i8>* %A

test/CodeGen/X86/vector-trunc.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,9 +1156,8 @@ define void @trunc16i16_16i8_ashr(<16 x i16> %a) {
11561156
; AVX1-NEXT: vpsraw $8, %xmm0, %xmm1
11571157
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
11581158
; AVX1-NEXT: vpsraw $8, %xmm0, %xmm0
1159-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1160-
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1161-
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1159+
; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1160+
; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm1
11621161
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
11631162
; AVX1-NEXT: vmovdqu %xmm0, (%rax)
11641163
; AVX1-NEXT: vzeroupper
@@ -1228,8 +1227,8 @@ define void @trunc16i16_16i8_lshr(<16 x i16> %a) {
12281227
; AVX1: # BB#0: # %entry
12291228
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
12301229
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
1230+
; AVX1-NEXT: vpackuswb %xmm0, %xmm1, %xmm1
12311231
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
1232-
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
12331232
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
12341233
; AVX1-NEXT: vmovdqu %xmm0, (%rax)
12351234
; AVX1-NEXT: vzeroupper
@@ -1958,11 +1957,10 @@ define void @PR34773(i16* %a0, i8* %a1) {
19581957
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
19591958
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u>
19601959
; AVX1-NEXT: vpshufb %xmm4, %xmm0, %xmm0
1961-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1962-
; AVX1-NEXT: vpshufb %xmm5, %xmm2, %xmm2
1960+
; AVX1-NEXT: vpackuswb %xmm0, %xmm2, %xmm2
19631961
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
19641962
; AVX1-NEXT: vpshufb %xmm4, %xmm1, %xmm1
1965-
; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm2
1963+
; AVX1-NEXT: vpackuswb %xmm0, %xmm3, %xmm2
19661964
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
19671965
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
19681966
; AVX1-NEXT: vmovups %ymm0, (%rsi)

0 commit comments

Comments
 (0)