Skip to content

Commit aa3097d

Browse files
RKSimongithub-actions[bot]
authored andcommitted
Automerge: [X86] SimplifyDemandedBitsForTargetNode - PCMPGT(0,X) only demands the signbit of X (#163981)
PCMPGT(0,X) is a sign-splat pattern - we only need the signbit of X The vector-compress.ll simplification is a side effect of the inner SimplifyDemandedBits call passing through the DemandedElts mask, only demanding the lowest 4 elements of a legalised v16i8 type.
2 parents a6d20c5 + 908b116 commit aa3097d

File tree

5 files changed

+19
-85
lines changed

5 files changed

+19
-85
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44813,10 +44813,16 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
4481344813
}
4481444814
case X86ISD::PCMPGT:
4481544815
// icmp sgt(0, R) == ashr(R, BitWidth-1).
44816-
// iff we only need the sign bit then we can use R directly.
44817-
if (OriginalDemandedBits.isSignMask() &&
44818-
ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()))
44819-
return TLO.CombineTo(Op, Op.getOperand(1));
44816+
if (ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode())) {
44817+
// iff we only need the signbit then we can use R directly.
44818+
if (OriginalDemandedBits.isSignMask())
44819+
return TLO.CombineTo(Op, Op.getOperand(1));
44820+
// otherwise we just need R's signbit for the comparison.
44821+
APInt SignMask = APInt::getSignMask(BitWidth);
44822+
if (SimplifyDemandedBits(Op.getOperand(1), SignMask, OriginalDemandedElts,
44823+
Known, TLO, Depth + 1))
44824+
return true;
44825+
}
4482044826
break;
4482144827
case X86ISD::MOVMSK: {
4482244828
SDValue Src = Op.getOperand(0);

llvm/test/CodeGen/X86/combine-umax.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
6060
define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> %a, <16 x i8> %b) {
6161
; SSE2-LABEL: test_v16i8_demandedbits:
6262
; SSE2: # %bb.0:
63-
; SSE2-NEXT: pmaxub %xmm1, %xmm0
63+
; SSE2-NEXT: por %xmm1, %xmm0
6464
; SSE2-NEXT: pxor %xmm1, %xmm1
6565
; SSE2-NEXT: pcmpgtb %xmm0, %xmm1
6666
; SSE2-NEXT: pand %xmm1, %xmm3

llvm/test/CodeGen/X86/combine-umin.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
7777
define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> %a, <16 x i8> %b) {
7878
; SSE2-LABEL: test_v16i8_demandedbits:
7979
; SSE2: # %bb.0:
80-
; SSE2-NEXT: pminub %xmm1, %xmm0
80+
; SSE2-NEXT: pand %xmm1, %xmm0
8181
; SSE2-NEXT: pxor %xmm1, %xmm1
8282
; SSE2-NEXT: pcmpgtb %xmm0, %xmm1
8383
; SSE2-NEXT: pand %xmm1, %xmm3

llvm/test/CodeGen/X86/vector-compress.ll

Lines changed: 1 addition & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,7 +1090,6 @@ define <16 x i8> @test_compress_v16i8(<16 x i8> %vec, <16 x i1> %mask, <16 x i8>
10901090
; AVX2-NEXT: pushq %r12
10911091
; AVX2-NEXT: pushq %rbx
10921092
; AVX2-NEXT: vpsllw $7, %xmm1, %xmm1
1093-
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
10941093
; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
10951094
; AVX2-NEXT: vpcmpgtb %xmm1, %xmm3, %xmm1
10961095
; AVX2-NEXT: vmovaps %xmm2, -{{[0-9]+}}(%rsp)
@@ -1335,7 +1334,6 @@ define <32 x i8> @test_compress_v32i8(<32 x i8> %vec, <32 x i1> %mask, <32 x i8>
13351334
; AVX2-NEXT: andq $-32, %rsp
13361335
; AVX2-NEXT: subq $64, %rsp
13371336
; AVX2-NEXT: vpsllw $7, %ymm1, %ymm1
1338-
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
13391337
; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
13401338
; AVX2-NEXT: vpcmpgtb %ymm1, %ymm3, %ymm3
13411339
; AVX2-NEXT: vmovaps %ymm2, (%rsp)
@@ -4733,7 +4731,6 @@ define <4 x i8> @test_compress_small(<4 x i8> %vec, <4 x i1> %mask) nounwind {
47334731
; AVX2: # %bb.0:
47344732
; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
47354733
; AVX2-NEXT: vpsllw $7, %xmm1, %xmm1
4736-
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
47374734
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
47384735
; AVX2-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
47394736
; AVX2-NEXT: vpextrb $0, %xmm0, -{{[0-9]+}}(%rsp)
@@ -4751,72 +4748,7 @@ define <4 x i8> @test_compress_small(<4 x i8> %vec, <4 x i1> %mask) nounwind {
47514748
; AVX2-NEXT: vpextrb $3, %xmm1, %ecx
47524749
; AVX2-NEXT: andl $1, %ecx
47534750
; AVX2-NEXT: addq %rax, %rcx
4754-
; AVX2-NEXT: vpextrb $4, %xmm0, -24(%rsp,%rcx)
4755-
; AVX2-NEXT: vpextrb $4, %xmm1, %eax
4756-
; AVX2-NEXT: andl $1, %eax
4757-
; AVX2-NEXT: addq %rcx, %rax
4758-
; AVX2-NEXT: vpextrb $5, %xmm1, %ecx
4759-
; AVX2-NEXT: andl $1, %ecx
4760-
; AVX2-NEXT: addq %rax, %rcx
4761-
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
4762-
; AVX2-NEXT: andl $15, %eax
4763-
; AVX2-NEXT: vpextrb $5, %xmm0, -24(%rsp,%rax)
4764-
; AVX2-NEXT: vpextrb $6, %xmm1, %eax
4765-
; AVX2-NEXT: andl $1, %eax
4766-
; AVX2-NEXT: addq %rcx, %rax
4767-
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
4768-
; AVX2-NEXT: andl $15, %ecx
4769-
; AVX2-NEXT: vpextrb $6, %xmm0, -24(%rsp,%rcx)
4770-
; AVX2-NEXT: vpextrb $7, %xmm1, %ecx
4771-
; AVX2-NEXT: andl $1, %ecx
4772-
; AVX2-NEXT: addq %rax, %rcx
4773-
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
4774-
; AVX2-NEXT: andl $15, %eax
4775-
; AVX2-NEXT: vpextrb $7, %xmm0, -24(%rsp,%rax)
4776-
; AVX2-NEXT: vpextrb $8, %xmm1, %eax
4777-
; AVX2-NEXT: andl $1, %eax
4778-
; AVX2-NEXT: addq %rcx, %rax
4779-
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
4780-
; AVX2-NEXT: andl $15, %ecx
4781-
; AVX2-NEXT: vpextrb $8, %xmm0, -24(%rsp,%rcx)
4782-
; AVX2-NEXT: vpextrb $9, %xmm1, %ecx
4783-
; AVX2-NEXT: andl $1, %ecx
4784-
; AVX2-NEXT: addq %rax, %rcx
4785-
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
4786-
; AVX2-NEXT: andl $15, %eax
4787-
; AVX2-NEXT: vpextrb $9, %xmm0, -24(%rsp,%rax)
4788-
; AVX2-NEXT: vpextrb $10, %xmm1, %eax
4789-
; AVX2-NEXT: andl $1, %eax
4790-
; AVX2-NEXT: addq %rcx, %rax
4791-
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
4792-
; AVX2-NEXT: andl $15, %ecx
4793-
; AVX2-NEXT: vpextrb $10, %xmm0, -24(%rsp,%rcx)
4794-
; AVX2-NEXT: vpextrb $11, %xmm1, %ecx
4795-
; AVX2-NEXT: andl $1, %ecx
4796-
; AVX2-NEXT: addq %rax, %rcx
4797-
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
4798-
; AVX2-NEXT: andl $15, %eax
4799-
; AVX2-NEXT: vpextrb $11, %xmm0, -24(%rsp,%rax)
4800-
; AVX2-NEXT: vpextrb $12, %xmm1, %eax
4801-
; AVX2-NEXT: andl $1, %eax
4802-
; AVX2-NEXT: addq %rcx, %rax
4803-
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
4804-
; AVX2-NEXT: andl $15, %ecx
4805-
; AVX2-NEXT: vpextrb $12, %xmm0, -24(%rsp,%rcx)
4806-
; AVX2-NEXT: vpextrb $13, %xmm1, %ecx
4807-
; AVX2-NEXT: andl $1, %ecx
4808-
; AVX2-NEXT: addq %rax, %rcx
4809-
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
4810-
; AVX2-NEXT: andl $15, %eax
4811-
; AVX2-NEXT: vpextrb $13, %xmm0, -24(%rsp,%rax)
4812-
; AVX2-NEXT: vpextrb $14, %xmm1, %eax
4813-
; AVX2-NEXT: andl $1, %eax
4814-
; AVX2-NEXT: addl %ecx, %eax
4815-
; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
4816-
; AVX2-NEXT: andl $15, %ecx
4817-
; AVX2-NEXT: vpextrb $14, %xmm0, -24(%rsp,%rcx)
4818-
; AVX2-NEXT: andl $15, %eax
4819-
; AVX2-NEXT: vpextrb $15, %xmm0, -24(%rsp,%rax)
4751+
; AVX2-NEXT: vpextrb $15, %xmm0, -24(%rsp,%rcx)
48204752
; AVX2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
48214753
; AVX2-NEXT: retq
48224754
;

llvm/test/CodeGen/X86/vselect-avx.ll

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -151,23 +151,19 @@ define <32 x i8> @PR22706(<32 x i1> %x) {
151151
; AVX1: ## %bb.0:
152152
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
153153
; AVX1-NEXT: vpsllw $7, %xmm1, %xmm1
154-
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
155-
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
156-
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
157-
; AVX1-NEXT: vpcmpgtb %xmm1, %xmm3, %xmm1
158-
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
159-
; AVX1-NEXT: vpaddb %xmm4, %xmm1, %xmm1
154+
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
155+
; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
156+
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
157+
; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
160158
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
161-
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
162-
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm0
163-
; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0
159+
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
160+
; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0
164161
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
165162
; AVX1-NEXT: retq
166163
;
167164
; AVX2-LABEL: PR22706:
168165
; AVX2: ## %bb.0:
169166
; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
170-
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
171167
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
172168
; AVX2-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
173169
; AVX2-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0

0 commit comments

Comments
 (0)