Skip to content

Commit d014f09

Browse files
committed
Fix failed UTs and resolve comments.
1 parent b45efac commit d014f09

File tree

3 files changed

+65
-26
lines changed

3 files changed

+65
-26
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,8 @@ namespace {
396396
bool PromoteLoad(SDValue Op);
397397

398398
SDValue foldShiftToAvg(SDNode *N);
399+
// Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
400+
SDValue foldBitwiseOpWithNeg(SDNode *N);
399401

400402
SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
401403
SDValue RHS, SDValue True, SDValue False,
@@ -7529,11 +7531,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
75297531
DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT));
75307532

75317533
// Fold (and X, (add (not Y), Z)) -> (and X, (not (sub Y, Z)))
7532-
if (sd_match(N, m_And(m_Value(X), m_Add(m_Value(NotY), m_Value(Z)))) &&
7533-
sd_match(NotY, m_Not(m_Value(Y))) &&
7534-
(TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7535-
return DAG.getNode(ISD::AND, DL, VT, X,
7536-
DAG.getNOT(DL, DAG.getNode(ISD::SUB, DL, VT, Y, Z), VT));
7534+
// Fold (and X, (sub (not Y), Z)) -> (and X, (not (add Y, Z)))
7535+
if (SDValue Folded = foldBitwiseOpWithNeg(N))
7536+
return Folded;
75377537

75387538
// Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
75397539
// If we are shifting down an extended sign bit, see if we can simplify
@@ -8212,6 +8212,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
82128212
}
82138213
}
82148214

8215+
// Fold (or X, (add (not Y), Z)) -> (or X, (not (sub Y, Z)))
8216+
// Fold (or X, (sub (not Y), Z)) -> (or X, (not (add Y, Z)))
8217+
if (SDValue Folded = foldBitwiseOpWithNeg(N))
8218+
return Folded;
8219+
82158220
// fold (or x, 0) -> x
82168221
if (isNullConstant(N1))
82178222
return N0;
@@ -9863,6 +9868,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
98639868
return DAG.getNode(ISD::ROTL, DL, VT, DAG.getSignedConstant(~1, DL, VT),
98649869
N0.getOperand(1));
98659870
}
9871+
// Fold (xor X, (add (not Y), Z)) -> (xor X, (not (sub Y, Z)))
9872+
// Fold (xor X, (sub (not Y), Z)) -> (xor X, (not (add Y, Z)))
9873+
if (SDValue Folded = foldBitwiseOpWithNeg(N))
9874+
return Folded;
98669875

98679876
// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
98689877
if (N0Opcode == N1.getOpcode())
@@ -11616,6 +11625,35 @@ SDValue DAGCombiner::foldShiftToAvg(SDNode *N) {
1161611625
return DAG.getNode(FloorISD, SDLoc(N), N->getValueType(0), {A, B});
1161711626
}
1161811627

11628+
SDValue DAGCombiner::foldBitwiseOpWithNeg(SDNode *N) {
11629+
if (!TLI.hasAndNot(SDValue(N, 0)))
11630+
return SDValue();
11631+
11632+
unsigned Opc = N->getOpcode();
11633+
if (Opc != ISD::AND && Opc != ISD::OR && Opc != ISD::XOR)
11634+
return SDValue();
11635+
11636+
SDValue N1 = N->getOperand(1);
11637+
EVT VT = N1.getValueType();
11638+
SDLoc DL(N);
11639+
SDValue X, Y, Z, NotY;
11640+
11641+
if (sd_match(
11642+
N, m_c_BinOp(Opc, m_Value(X), m_Add(m_AllOf(m_Value(NotY), m_Not(m_Value(Y))),
11643+
m_Value(Z)))))
11644+
return DAG.getNode(Opc, DL, VT, X,
11645+
DAG.getNOT(DL, DAG.getNode(ISD::SUB, DL, VT, Y, Z), VT));
11646+
11647+
if (sd_match(N, m_c_BinOp(Opc, m_Value(X),
11648+
m_Sub(m_AllOf(m_Value(NotY), m_Not(m_Value(Y))),
11649+
m_Value(Z)))) &&
11650+
NotY->hasOneUse())
11651+
return DAG.getNode(Opc, DL, VT, X,
11652+
DAG.getNOT(DL, DAG.getNode(ISD::ADD, DL, VT, Y, Z), VT));
11653+
11654+
return SDValue();
11655+
}
11656+
1161911657
/// Generate Min/Max node
1162011658
SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
1162111659
SDValue RHS, SDValue True,

llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -885,9 +885,8 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
885885
define i8 @test_not_cttz_i8(i8 %a) nounwind {
886886
; LA32R-LABEL: test_not_cttz_i8:
887887
; LA32R: # %bb.0:
888-
; LA32R-NEXT: nor $a1, $a0, $zero
889-
; LA32R-NEXT: addi.w $a1, $a1, -1
890-
; LA32R-NEXT: and $a0, $a0, $a1
888+
; LA32R-NEXT: addi.w $a1, $a0, 1
889+
; LA32R-NEXT: andn $a0, $a0, $a1
891890
; LA32R-NEXT: srli.w $a1, $a0, 1
892891
; LA32R-NEXT: andi $a1, $a1, 85
893892
; LA32R-NEXT: sub.w $a0, $a0, $a1
@@ -921,9 +920,8 @@ define i8 @test_not_cttz_i8(i8 %a) nounwind {
921920
define i16 @test_not_cttz_i16(i16 %a) nounwind {
922921
; LA32R-LABEL: test_not_cttz_i16:
923922
; LA32R: # %bb.0:
924-
; LA32R-NEXT: nor $a1, $a0, $zero
925-
; LA32R-NEXT: addi.w $a1, $a1, -1
926-
; LA32R-NEXT: and $a0, $a0, $a1
923+
; LA32R-NEXT: addi.w $a1, $a0, 1
924+
; LA32R-NEXT: andn $a0, $a0, $a1
927925
; LA32R-NEXT: srli.w $a1, $a0, 1
928926
; LA32R-NEXT: lu12i.w $a2, 5
929927
; LA32R-NEXT: ori $a2, $a2, 1365

llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -73,17 +73,20 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
7373
; X86-NEXT: xorl %esi, %esi
7474
; X86-NEXT: xorps %xmm3, %xmm3
7575
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
76-
; X86-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
76+
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
7777
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload
7878
; X86-NEXT: calll *%esi
7979
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
8080
; X86-NEXT: minps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
8181
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
82-
; X86-NEXT: pxor %xmm1, %xmm1
83-
; X86-NEXT: psubd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Folded Reload
82+
; X86-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
83+
; X86-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
8484
; X86-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
85-
; X86-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
85+
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
8686
; X86-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
87+
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
88+
; X86-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
89+
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
8790
; X86-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
8891
; X86-NEXT: por %xmm1, %xmm0
8992
; X86-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
@@ -108,10 +111,8 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
108111
; X64-NEXT: retq
109112
; X64-NEXT: LBB0_3: ## %forbody
110113
; X64-NEXT: pushq %rbx
111-
; X64-NEXT: subq $64, %rsp
112-
; X64-NEXT: xorps %xmm0, %xmm0
113-
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
114-
; X64-NEXT: movaps {{.*#+}} xmm1 = [1.28E+2,1.28E+2,1.28E+2,1.28E+2]
114+
; X64-NEXT: subq $48, %rsp
115+
; X64-NEXT: movaps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
115116
; X64-NEXT: minps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
116117
; X64-NEXT: cvttps2dq %xmm1, %xmm0
117118
; X64-NEXT: cvtdq2ps %xmm0, %xmm0
@@ -162,17 +163,19 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
162163
; X64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
163164
; X64-NEXT: minps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
164165
; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
166+
; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
167+
; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
165168
; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
166-
; X64-NEXT: psubd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
167-
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
168-
; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
169-
; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
169+
; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
170170
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
171171
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
172-
; X64-NEXT: orps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
172+
; X64-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
173173
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
174-
; X64-NEXT: xorps %xmm3, %xmm3
175-
; X64-NEXT: xorps %xmm4, %xmm4
174+
; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
175+
; X64-NEXT: por %xmm1, %xmm0
176+
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
177+
; X64-NEXT: xorps %xmm3, %xmm3
178+
; X64-NEXT: xorps %xmm4, %xmm4
176179
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
177180
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
178181
; X64-NEXT: movaps (%rsp), %xmm2 ## 16-byte Reload

0 commit comments

Comments
 (0)