Skip to content

Commit 2830363

Browse files
committed
[X86][SSE] combineVectorTruncation - enable (pre-SSSE3) vXi16->vXi8 truncation.
Shuffle combining can now handle this output, and by performing this early in combineVectorTruncation we avoid a scalarization that caused a regression on D87502.
1 parent ee1c04a commit 2830363

File tree

7 files changed

+45
-29
lines changed

7 files changed

+45
-29
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45378,7 +45378,7 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
4537845378

4537945379
EVT OutSVT = OutVT.getVectorElementType();
4538045380
EVT InSVT = InVT.getVectorElementType();
45381-
if (!((InSVT == MVT::i32 || InSVT == MVT::i64) &&
45381+
if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
4538245382
(OutSVT == MVT::i8 || OutSVT == MVT::i16) && isPowerOf2_32(NumElems) &&
4538345383
NumElems >= 8))
4538445384
return SDValue();

llvm/test/CodeGen/X86/masked_store_trunc.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4431,7 +4431,7 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
44314431
; SSE2-LABEL: truncstore_v32i16_v32i8:
44324432
; SSE2: # %bb.0:
44334433
; SSE2-NEXT: pxor %xmm7, %xmm7
4434-
; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [255,255,255,255,255,255,255,255]
4434+
; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
44354435
; SSE2-NEXT: pand %xmm6, %xmm1
44364436
; SSE2-NEXT: pand %xmm6, %xmm0
44374437
; SSE2-NEXT: packuswb %xmm1, %xmm0
@@ -4648,7 +4648,7 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
46484648
; SSE4-LABEL: truncstore_v32i16_v32i8:
46494649
; SSE4: # %bb.0:
46504650
; SSE4-NEXT: pxor %xmm7, %xmm7
4651-
; SSE4-NEXT: movdqa {{.*#+}} xmm6 = [255,255,255,255,255,255,255,255]
4651+
; SSE4-NEXT: movdqa {{.*#+}} xmm6 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
46524652
; SSE4-NEXT: pand %xmm6, %xmm1
46534653
; SSE4-NEXT: pand %xmm6, %xmm0
46544654
; SSE4-NEXT: packuswb %xmm1, %xmm0
@@ -5634,7 +5634,7 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
56345634
; SSE2-LABEL: truncstore_v16i16_v16i8:
56355635
; SSE2: # %bb.0:
56365636
; SSE2-NEXT: pxor %xmm3, %xmm3
5637-
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
5637+
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
56385638
; SSE2-NEXT: pand %xmm4, %xmm1
56395639
; SSE2-NEXT: pand %xmm4, %xmm0
56405640
; SSE2-NEXT: packuswb %xmm1, %xmm0
@@ -5746,7 +5746,7 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
57465746
; SSE4-LABEL: truncstore_v16i16_v16i8:
57475747
; SSE4: # %bb.0:
57485748
; SSE4-NEXT: pxor %xmm3, %xmm3
5749-
; SSE4-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
5749+
; SSE4-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
57505750
; SSE4-NEXT: pand %xmm4, %xmm1
57515751
; SSE4-NEXT: pand %xmm4, %xmm0
57525752
; SSE4-NEXT: packuswb %xmm1, %xmm0

llvm/test/CodeGen/X86/vector-reduce-and-bool.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) {
358358
define i1 @trunc_v16i16_v16i1(<16 x i16>) {
359359
; SSE-LABEL: trunc_v16i16_v16i1:
360360
; SSE: # %bb.0:
361-
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
361+
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
362362
; SSE-NEXT: pand %xmm2, %xmm1
363363
; SSE-NEXT: pand %xmm2, %xmm0
364364
; SSE-NEXT: packuswb %xmm1, %xmm0

llvm/test/CodeGen/X86/vector-reduce-or-bool.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) {
345345
define i1 @trunc_v16i16_v16i1(<16 x i16>) {
346346
; SSE-LABEL: trunc_v16i16_v16i1:
347347
; SSE: # %bb.0:
348-
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
348+
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
349349
; SSE-NEXT: pand %xmm2, %xmm1
350350
; SSE-NEXT: pand %xmm2, %xmm0
351351
; SSE-NEXT: packuswb %xmm1, %xmm0

llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) {
352352
define i1 @trunc_v16i16_v16i1(<16 x i16>) {
353353
; SSE-LABEL: trunc_v16i16_v16i1:
354354
; SSE: # %bb.0:
355-
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
355+
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
356356
; SSE-NEXT: pand %xmm2, %xmm1
357357
; SSE-NEXT: pand %xmm2, %xmm0
358358
; SSE-NEXT: packuswb %xmm1, %xmm0

llvm/test/CodeGen/X86/vector-trunc-math.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ define <16 x i8> @trunc_add_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin
382382
; SSE: # %bb.0:
383383
; SSE-NEXT: paddw %xmm2, %xmm0
384384
; SSE-NEXT: paddw %xmm3, %xmm1
385-
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
385+
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
386386
; SSE-NEXT: pand %xmm2, %xmm1
387387
; SSE-NEXT: pand %xmm2, %xmm0
388388
; SSE-NEXT: packuswb %xmm1, %xmm0
@@ -798,7 +798,7 @@ define <16 x i8> @trunc_add_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
798798
define <16 x i8> @trunc_add_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
799799
; SSE-LABEL: trunc_add_const_v16i16_v16i8:
800800
; SSE: # %bb.0:
801-
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
801+
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
802802
; SSE-NEXT: pand %xmm2, %xmm1
803803
; SSE-NEXT: pand %xmm2, %xmm0
804804
; SSE-NEXT: packuswb %xmm1, %xmm0
@@ -1226,7 +1226,7 @@ define <16 x i8> @trunc_sub_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin
12261226
; SSE: # %bb.0:
12271227
; SSE-NEXT: psubw %xmm2, %xmm0
12281228
; SSE-NEXT: psubw %xmm3, %xmm1
1229-
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1229+
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
12301230
; SSE-NEXT: pand %xmm2, %xmm1
12311231
; SSE-NEXT: pand %xmm2, %xmm0
12321232
; SSE-NEXT: packuswb %xmm1, %xmm0
@@ -1610,7 +1610,7 @@ define <16 x i8> @trunc_sub_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
16101610
define <16 x i8> @trunc_sub_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
16111611
; SSE-LABEL: trunc_sub_const_v16i16_v16i8:
16121612
; SSE: # %bb.0:
1613-
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1613+
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
16141614
; SSE-NEXT: pand %xmm2, %xmm1
16151615
; SSE-NEXT: pand %xmm2, %xmm0
16161616
; SSE-NEXT: packuswb %xmm1, %xmm0
@@ -2201,7 +2201,7 @@ define <16 x i8> @trunc_mul_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin
22012201
; SSE: # %bb.0:
22022202
; SSE-NEXT: pmullw %xmm2, %xmm0
22032203
; SSE-NEXT: pmullw %xmm3, %xmm1
2204-
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
2204+
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
22052205
; SSE-NEXT: pand %xmm2, %xmm1
22062206
; SSE-NEXT: pand %xmm2, %xmm0
22072207
; SSE-NEXT: packuswb %xmm1, %xmm0
@@ -2706,7 +2706,7 @@ define <16 x i8> @trunc_mul_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
27062706
; SSE: # %bb.0:
27072707
; SSE-NEXT: pmullw {{.*}}(%rip), %xmm0
27082708
; SSE-NEXT: pmullw {{.*}}(%rip), %xmm1
2709-
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
2709+
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
27102710
; SSE-NEXT: pand %xmm2, %xmm1
27112711
; SSE-NEXT: pand %xmm2, %xmm0
27122712
; SSE-NEXT: packuswb %xmm1, %xmm0
@@ -3106,7 +3106,7 @@ define <16 x i8> @trunc_and_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin
31063106
define <16 x i8> @trunc_and_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwind {
31073107
; SSE-LABEL: trunc_and_v16i16_v16i8:
31083108
; SSE: # %bb.0:
3109-
; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
3109+
; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
31103110
; SSE-NEXT: pand %xmm4, %xmm3
31113111
; SSE-NEXT: pand %xmm1, %xmm3
31123112
; SSE-NEXT: pand %xmm4, %xmm2
@@ -3471,7 +3471,7 @@ define <16 x i8> @trunc_and_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
34713471
define <16 x i8> @trunc_and_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
34723472
; SSE-LABEL: trunc_and_const_v16i16_v16i8:
34733473
; SSE: # %bb.0:
3474-
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
3474+
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
34753475
; SSE-NEXT: pand %xmm2, %xmm1
34763476
; SSE-NEXT: pand %xmm2, %xmm0
34773477
; SSE-NEXT: packuswb %xmm1, %xmm0
@@ -3871,7 +3871,7 @@ define <16 x i8> @trunc_xor_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin
38713871
; SSE: # %bb.0:
38723872
; SSE-NEXT: pxor %xmm2, %xmm0
38733873
; SSE-NEXT: pxor %xmm3, %xmm1
3874-
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
3874+
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
38753875
; SSE-NEXT: pand %xmm2, %xmm1
38763876
; SSE-NEXT: pand %xmm2, %xmm0
38773877
; SSE-NEXT: packuswb %xmm1, %xmm0
@@ -4234,7 +4234,7 @@ define <16 x i8> @trunc_xor_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
42344234
define <16 x i8> @trunc_xor_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
42354235
; SSE-LABEL: trunc_xor_const_v16i16_v16i8:
42364236
; SSE: # %bb.0:
4237-
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
4237+
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
42384238
; SSE-NEXT: pand %xmm2, %xmm1
42394239
; SSE-NEXT: pand %xmm2, %xmm0
42404240
; SSE-NEXT: packuswb %xmm1, %xmm0
@@ -4634,7 +4634,7 @@ define <16 x i8> @trunc_or_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwind
46344634
; SSE: # %bb.0:
46354635
; SSE-NEXT: por %xmm2, %xmm0
46364636
; SSE-NEXT: por %xmm3, %xmm1
4637-
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
4637+
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
46384638
; SSE-NEXT: pand %xmm2, %xmm1
46394639
; SSE-NEXT: pand %xmm2, %xmm0
46404640
; SSE-NEXT: packuswb %xmm1, %xmm0
@@ -4997,7 +4997,7 @@ define <16 x i8> @trunc_or_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
49974997
define <16 x i8> @trunc_or_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
49984998
; SSE-LABEL: trunc_or_const_v16i16_v16i8:
49994999
; SSE: # %bb.0:
5000-
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
5000+
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
50015001
; SSE-NEXT: pand %xmm2, %xmm1
50025002
; SSE-NEXT: pand %xmm2, %xmm0
50035003
; SSE-NEXT: packuswb %xmm1, %xmm0

llvm/test/CodeGen/X86/vector-trunc.ll

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,7 +1026,7 @@ entry:
10261026
define void @trunc16i16_16i8(<16 x i16> %a) {
10271027
; SSE-LABEL: trunc16i16_16i8:
10281028
; SSE: # %bb.0: # %entry
1029-
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1029+
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
10301030
; SSE-NEXT: pand %xmm2, %xmm1
10311031
; SSE-NEXT: pand %xmm2, %xmm0
10321032
; SSE-NEXT: packuswb %xmm1, %xmm0
@@ -1217,7 +1217,7 @@ entry:
12171217
define void @trunc32i16_32i8(<32 x i16> %a) {
12181218
; SSE-LABEL: trunc32i16_32i8:
12191219
; SSE: # %bb.0: # %entry
1220-
; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
1220+
; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
12211221
; SSE-NEXT: pand %xmm4, %xmm1
12221222
; SSE-NEXT: pand %xmm4, %xmm0
12231223
; SSE-NEXT: packuswb %xmm1, %xmm0
@@ -1675,13 +1675,29 @@ entry:
16751675
}
16761676

16771677
define <16 x i8> @trunc2x8i16_16i8(<8 x i16> %a, <8 x i16> %b) {
1678-
; SSE-LABEL: trunc2x8i16_16i8:
1679-
; SSE: # %bb.0: # %entry
1680-
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1681-
; SSE-NEXT: pand %xmm2, %xmm1
1682-
; SSE-NEXT: pand %xmm2, %xmm0
1683-
; SSE-NEXT: packuswb %xmm1, %xmm0
1684-
; SSE-NEXT: retq
1678+
; SSE2-LABEL: trunc2x8i16_16i8:
1679+
; SSE2: # %bb.0: # %entry
1680+
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
1681+
; SSE2-NEXT: pand %xmm2, %xmm0
1682+
; SSE2-NEXT: pand %xmm2, %xmm1
1683+
; SSE2-NEXT: packuswb %xmm1, %xmm0
1684+
; SSE2-NEXT: retq
1685+
;
1686+
; SSSE3-LABEL: trunc2x8i16_16i8:
1687+
; SSSE3: # %bb.0: # %entry
1688+
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1689+
; SSSE3-NEXT: pand %xmm2, %xmm1
1690+
; SSSE3-NEXT: pand %xmm2, %xmm0
1691+
; SSSE3-NEXT: packuswb %xmm1, %xmm0
1692+
; SSSE3-NEXT: retq
1693+
;
1694+
; SSE41-LABEL: trunc2x8i16_16i8:
1695+
; SSE41: # %bb.0: # %entry
1696+
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1697+
; SSE41-NEXT: pand %xmm2, %xmm1
1698+
; SSE41-NEXT: pand %xmm2, %xmm0
1699+
; SSE41-NEXT: packuswb %xmm1, %xmm0
1700+
; SSE41-NEXT: retq
16851701
;
16861702
; AVX-LABEL: trunc2x8i16_16i8:
16871703
; AVX: # %bb.0: # %entry

0 commit comments

Comments
 (0)