Skip to content

Commit 57b0194

Browse files
committed
[X86] IsNOT - fold PCMPGT(C, X) -> PCMPGT(X,C-1)
To invert the result, we can profitably commute a PCMPGT node if the LHS was a constant (C > min_signed_value): https://alive2.llvm.org/ce/z/LxcPqm Allows the constant to fold, and helps reduce register pressure Fixes #67347
1 parent d339d8f commit 57b0194

File tree

8 files changed

+399
-571
lines changed

8 files changed

+399
-571
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4881,9 +4881,10 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
48814881
return true;
48824882
}
48834883

4884-
// Match (xor X, -1) -> X.
4885-
// Match extract_subvector(xor X, -1) -> extract_subvector(X).
4886-
// Match concat_vectors(xor X, -1, xor Y, -1) -> concat_vectors(X, Y).
4884+
// Match not(xor X, -1) -> X.
4885+
// Match not(pcmpgt(C, X)) -> pcmpgt(X, C - 1).
4886+
// Match not(extract_subvector(xor X, -1)) -> extract_subvector(X).
4887+
// Match not(concat_vectors(xor X, -1, xor Y, -1)) -> concat_vectors(X, Y).
48874888
static SDValue IsNOT(SDValue V, SelectionDAG &DAG) {
48884889
V = peekThroughBitcasts(V);
48894890
if (V.getOpcode() == ISD::XOR &&
@@ -4898,6 +4899,29 @@ static SDValue IsNOT(SDValue V, SelectionDAG &DAG) {
48984899
Not, V.getOperand(1));
48994900
}
49004901
}
4902+
if (V.getOpcode() == X86ISD::PCMPGT &&
4903+
!ISD::isBuildVectorAllZeros(V.getOperand(0).getNode()) &&
4904+
!ISD::isBuildVectorAllOnes(V.getOperand(0).getNode()) &&
4905+
V.getOperand(0).hasOneUse()) {
4906+
APInt UndefElts;
4907+
SmallVector<APInt> EltBits;
4908+
if (getTargetConstantBitsFromNode(V.getOperand(0),
4909+
V.getScalarValueSizeInBits(), UndefElts,
4910+
EltBits)) {
4911+
// Don't fold min_signed_value -> (min_signed_value - 1)
4912+
bool MinSigned = false;
4913+
for (APInt &Elt : EltBits) {
4914+
MinSigned |= Elt.isMinSignedValue();
4915+
Elt -= 1;
4916+
}
4917+
if (!MinSigned) {
4918+
SDLoc DL(V);
4919+
MVT VT = V.getSimpleValueType();
4920+
return DAG.getNode(X86ISD::PCMPGT, DL, VT, V.getOperand(1),
4921+
getConstVector(EltBits, UndefElts, VT, DAG, DL));
4922+
}
4923+
}
4924+
}
49014925
SmallVector<SDValue, 2> CatOps;
49024926
if (collectConcatOps(V.getNode(), CatOps, DAG)) {
49034927
for (SDValue &CatOp : CatOps) {

llvm/test/CodeGen/X86/fpclamptosat_vec.ll

Lines changed: 30 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,11 @@ define <2 x i32> @utest_f64i32(<2 x double> %x) {
7676
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
7777
; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
7878
; CHECK-NEXT: pxor %xmm2, %xmm0
79-
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
80-
; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [2147483647,2147483647,2147483647,2147483647]
81-
; CHECK-NEXT: pcmpgtd %xmm1, %xmm3
82-
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
83-
; CHECK-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
84-
; CHECK-NEXT: pand %xmm3, %xmm0
79+
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
80+
; CHECK-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
81+
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
82+
; CHECK-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
83+
; CHECK-NEXT: pandn %xmm1, %xmm0
8584
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
8685
; CHECK-NEXT: pxor %xmm0, %xmm1
8786
; CHECK-NEXT: pand %xmm2, %xmm0
@@ -731,12 +730,11 @@ define <2 x i16> @utest_f64i16(<2 x double> %x) {
731730
; CHECK-NEXT: orpd %xmm1, %xmm0
732731
; CHECK-NEXT: movapd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
733732
; CHECK-NEXT: xorpd %xmm0, %xmm1
734-
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = <2147549183,2147549183,u,u>
735-
; CHECK-NEXT: pcmpgtd %xmm1, %xmm2
736-
; CHECK-NEXT: andpd %xmm2, %xmm0
737-
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
738-
; CHECK-NEXT: pxor %xmm2, %xmm1
739-
; CHECK-NEXT: por %xmm0, %xmm1
733+
; CHECK-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
734+
; CHECK-NEXT: movdqa %xmm1, %xmm2
735+
; CHECK-NEXT: pandn %xmm0, %xmm2
736+
; CHECK-NEXT: psrld $16, %xmm1
737+
; CHECK-NEXT: por %xmm2, %xmm1
740738
; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
741739
; CHECK-NEXT: retq
742740
entry:
@@ -801,12 +799,11 @@ define <4 x i16> @utest_f32i16(<4 x float> %x) {
801799
; CHECK-NEXT: por %xmm1, %xmm0
802800
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
803801
; CHECK-NEXT: pxor %xmm0, %xmm1
804-
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [2147549183,2147549183,2147549183,2147549183]
805-
; CHECK-NEXT: pcmpgtd %xmm1, %xmm2
806-
; CHECK-NEXT: pand %xmm2, %xmm0
807-
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
808-
; CHECK-NEXT: pxor %xmm2, %xmm1
809-
; CHECK-NEXT: por %xmm0, %xmm1
802+
; CHECK-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
803+
; CHECK-NEXT: movdqa %xmm1, %xmm2
804+
; CHECK-NEXT: pandn %xmm0, %xmm2
805+
; CHECK-NEXT: psrld $16, %xmm1
806+
; CHECK-NEXT: por %xmm2, %xmm1
810807
; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
811808
; CHECK-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
812809
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -1653,12 +1650,11 @@ define <2 x i32> @utest_f64i32_mm(<2 x double> %x) {
16531650
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
16541651
; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
16551652
; CHECK-NEXT: pxor %xmm2, %xmm0
1656-
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
1657-
; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [2147483647,2147483647,2147483647,2147483647]
1658-
; CHECK-NEXT: pcmpgtd %xmm1, %xmm3
1659-
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1660-
; CHECK-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1661-
; CHECK-NEXT: pand %xmm3, %xmm0
1653+
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1654+
; CHECK-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1655+
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
1656+
; CHECK-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1657+
; CHECK-NEXT: pandn %xmm1, %xmm0
16621658
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
16631659
; CHECK-NEXT: pxor %xmm0, %xmm1
16641660
; CHECK-NEXT: pand %xmm2, %xmm0
@@ -2293,12 +2289,11 @@ define <2 x i16> @utest_f64i16_mm(<2 x double> %x) {
22932289
; CHECK-NEXT: orpd %xmm1, %xmm0
22942290
; CHECK-NEXT: movapd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
22952291
; CHECK-NEXT: xorpd %xmm0, %xmm1
2296-
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = <2147549183,2147549183,u,u>
2297-
; CHECK-NEXT: pcmpgtd %xmm1, %xmm2
2298-
; CHECK-NEXT: andpd %xmm2, %xmm0
2299-
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
2300-
; CHECK-NEXT: pxor %xmm2, %xmm1
2301-
; CHECK-NEXT: por %xmm0, %xmm1
2292+
; CHECK-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2293+
; CHECK-NEXT: movdqa %xmm1, %xmm2
2294+
; CHECK-NEXT: pandn %xmm0, %xmm2
2295+
; CHECK-NEXT: psrld $16, %xmm1
2296+
; CHECK-NEXT: por %xmm2, %xmm1
23022297
; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
23032298
; CHECK-NEXT: retq
23042299
entry:
@@ -2358,12 +2353,11 @@ define <4 x i16> @utest_f32i16_mm(<4 x float> %x) {
23582353
; CHECK-NEXT: por %xmm1, %xmm0
23592354
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
23602355
; CHECK-NEXT: pxor %xmm0, %xmm1
2361-
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [2147549183,2147549183,2147549183,2147549183]
2362-
; CHECK-NEXT: pcmpgtd %xmm1, %xmm2
2363-
; CHECK-NEXT: pand %xmm2, %xmm0
2364-
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
2365-
; CHECK-NEXT: pxor %xmm2, %xmm1
2366-
; CHECK-NEXT: por %xmm0, %xmm1
2356+
; CHECK-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2357+
; CHECK-NEXT: movdqa %xmm1, %xmm2
2358+
; CHECK-NEXT: pandn %xmm0, %xmm2
2359+
; CHECK-NEXT: psrld $16, %xmm1
2360+
; CHECK-NEXT: por %xmm2, %xmm1
23672361
; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
23682362
; CHECK-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
23692363
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]

llvm/test/CodeGen/X86/icmp-pow2-diff.ll

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -215,29 +215,28 @@ define <16 x i1> @andnot_ne_v16i8_fail_max_not_n1(<16 x i8> %x) nounwind {
215215
; AVX512-LABEL: andnot_ne_v16i8_fail_max_not_n1:
216216
; AVX512: # %bb.0:
217217
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
218-
; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1
219-
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
220-
; AVX512-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
221-
; AVX512-NEXT: vpandn %xmm0, %xmm1, %xmm0
218+
; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2
219+
; AVX512-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
220+
; AVX512-NEXT: vpternlogq $54, %xmm2, %xmm1, %xmm0
222221
; AVX512-NEXT: retq
223222
;
224223
; AVX2-LABEL: andnot_ne_v16i8_fail_max_not_n1:
225224
; AVX2: # %bb.0:
226225
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
227-
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1
228-
; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
229-
; AVX2-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
230-
; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0
226+
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2
227+
; AVX2-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
228+
; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0
229+
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
231230
; AVX2-NEXT: retq
232231
;
233232
; SSE-LABEL: andnot_ne_v16i8_fail_max_not_n1:
234233
; SSE: # %bb.0:
235234
; SSE-NEXT: pcmpeqd %xmm1, %xmm1
236-
; SSE-NEXT: pcmpeqb %xmm0, %xmm1
237-
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
238-
; SSE-NEXT: pcmpgtb %xmm0, %xmm2
239-
; SSE-NEXT: pandn %xmm2, %xmm1
240-
; SSE-NEXT: movdqa %xmm1, %xmm0
235+
; SSE-NEXT: movdqa %xmm0, %xmm2
236+
; SSE-NEXT: pcmpeqb %xmm1, %xmm2
237+
; SSE-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
238+
; SSE-NEXT: por %xmm2, %xmm0
239+
; SSE-NEXT: pxor %xmm1, %xmm0
241240
; SSE-NEXT: retq
242241
%cmp1 = icmp ne <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
243242
%cmp2 = icmp ne <16 x i8> %x, <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>

0 commit comments

Comments
 (0)