Skip to content

Commit b7ede52

Browse files
committed
[RISCV] Optimize (and (icmp x, 0, eq), (icmp y, 0, eq)) utilizing zicond extension
%1 = icmp x, 0, eq %2 = icmp y, 0, eq %3 = and %1, %2 Origionally lowered to: %1 = seqz x %2 = seqz y %3 = and %1, %2 With optimiztion: %1 = seqz x %3 = czero.eqz %1, y
1 parent 6ee8775 commit b7ede52

File tree

7 files changed

+80
-28
lines changed

7 files changed

+80
-28
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2449,8 +2449,8 @@ class LLVM_ABI TargetLoweringBase {
24492449
/// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely
24502450
/// that it saves us from materializing N0 and N1 in an integer register.
24512451
/// Targets that are able to perform and/or on flags should return false here.
2452-
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context,
2453-
EVT VT) const {
2452+
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT,
2453+
SDNode *) const {
24542454
// If a target has multiple condition registers, then it likely has logical
24552455
// operations on those registers.
24562456
if (hasMultipleConditionRegisters())
@@ -2462,6 +2462,10 @@ class LLVM_ABI TargetLoweringBase {
24622462
Action != TypeSplitVector;
24632463
}
24642464

2465+
// Return true is targets has a conditional zero-ing instruction
2466+
// i.e. select cond, x, 0
2467+
virtual bool hasConditionalZero() const { return false; }
2468+
24652469
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; }
24662470

24672471
/// Return true if a select of constants (select Cond, C1, C2) should be

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12050,7 +12050,9 @@ static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL,
1205012050

1205112051
// select Cond, T, Cond --> and Cond, freeze(T)
1205212052
// select Cond, T, 0 --> and Cond, freeze(T)
12053-
if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
12053+
// select Cond, T, 0 is a conditional zero
12054+
if (Cond == F || (!TLI.hasConditionalZero() &&
12055+
isNullOrNullSplat(F, /* AllowUndefs */ true)))
1205412056
return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));
1205512057

1205612058
// select Cond, T, 1 --> or (not Cond), freeze(T)
@@ -12061,7 +12063,7 @@ static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL,
1206112063
}
1206212064

1206312065
// select Cond, 0, F --> and (not Cond), freeze(F)
12064-
if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
12066+
if (!TLI.hasConditionalZero() && isNullOrNullSplat(T, /* AllowUndefs */ true)) {
1206512067
SDValue NotCond =
1206612068
matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
1206712069
return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));
@@ -12214,7 +12216,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
1221412216
// and we always transform to the left side if we know that we can further
1221512217
// optimize the combination of the conditions.
1221612218
bool normalizeToSequence =
12217-
TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
12219+
TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT, N);
1221812220
// select (and Cond0, Cond1), X, Y
1221912221
// -> select Cond0, (select Cond1, X, Y), Y
1222012222
if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28262,8 +28262,8 @@ bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
2826228262
return all_equal(ValueVTs);
2826328263
}
2826428264

28265-
bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
28266-
EVT) const {
28265+
bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &, EVT,
28266+
SDNode *) const {
2826728267
return false;
2826828268
}
2826928269

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -836,7 +836,8 @@ class AArch64TargetLowering : public TargetLowering {
836836
SmallVectorImpl<SDValue> &Results,
837837
SelectionDAG &DAG) const;
838838

839-
bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
839+
bool shouldNormalizeToSelectSequence(LLVMContext &, EVT,
840+
SDNode *) const override;
840841

841842
void finalizeLowering(MachineFunction &MF) const override;
842843

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2688,6 +2688,31 @@ bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
26882688
(VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
26892689
}
26902690

2691+
// Disable normalizing for most cases
2692+
// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
2693+
// select(N0|N1, X, Y) => select(N0, Y, select(N1, X, Y))
2694+
// If y == 0 and N0 == setcc(eqz || nez) -> czero (select(N1, X, 0), N0)
2695+
bool RISCVTargetLowering::shouldNormalizeToSelectSequence(LLVMContext &, EVT VT,
2696+
SDNode *N) const {
2697+
if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) {
2698+
assert(
2699+
N->getOpcode() == ISD::SELECT &&
2700+
"shouldNormalizeTooSelectSequence() called with non-SELECT operation");
2701+
const SDValue &CondV = N->getOperand(0);
2702+
if (CondV.getOpcode() == ISD::SETCC && isNullConstant(N->getOperand(2))) {
2703+
ISD::CondCode CondCode = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
2704+
if (CondCode == ISD::SETNE || CondCode == ISD::SETEQ) {
2705+
return true;
2706+
}
2707+
}
2708+
}
2709+
return false;
2710+
}
2711+
2712+
bool RISCVTargetLowering::hasConditionalZero() const {
2713+
return Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps();
2714+
}
2715+
26912716
bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
26922717
if (!ScalarTy.isSimple())
26932718
return false;
@@ -15731,6 +15756,35 @@ static SDValue performANDCombine(SDNode *N,
1573115756
if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
1573215757
return V;
1573315758

15759+
if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) {
15760+
auto IsCzeroCompatible = [](const SDValue &Op0,
15761+
const SDValue &Op1) -> bool {
15762+
if (Op0.getValueType() == MVT::i1 && Op1.getOpcode() == ISD::SETCC &&
15763+
isNullConstant(Op1.getOperand(1))) {
15764+
ISD::CondCode CondCode = cast<CondCodeSDNode>(Op1.getOperand(2))->get();
15765+
return CondCode == ISD::SETNE || CondCode == ISD::SETEQ;
15766+
}
15767+
return false;
15768+
};
15769+
// (and (i1) f, (setcc c, 0, ne)) -> (select c, f, 0) -> (czero.nez f, c)
15770+
// (and (i1) f, (setcc c, 0, eq)) -> (select c, 0, f) -> (czero.eqz f, c)
15771+
// (and (setcc c, 0, ne), (i1) g) -> (select c, g, 0) -> (czero.nez g, c)
15772+
// (and (setcc c, 0, eq), (i1) g) -> (select c, 0, g) -> (czero.eqz g, c)
15773+
if (IsCzeroCompatible(N->getOperand(0), N->getOperand(1)) ||
15774+
IsCzeroCompatible(N->getOperand(1), N->getOperand(0))) {
15775+
const bool CzeroOp1 =
15776+
IsCzeroCompatible(N->getOperand(0), N->getOperand(1));
15777+
const SDValue &I1Op = CzeroOp1 ? N->getOperand(0) : N->getOperand(1);
15778+
const SDValue &SetCCOp = CzeroOp1 ? N->getOperand(1) : N->getOperand(0);
15779+
15780+
ISD::CondCode CondCode =
15781+
cast<CondCodeSDNode>(SetCCOp.getOperand(2))->get();
15782+
SDLoc DL(N);
15783+
const SDValue &Condition = SetCCOp.getOperand(0);
15784+
return DAG.getNode(ISD::SELECT, DL, MVT::i1, SetCCOp, I1Op, DAG.getConstant(0, DL, MVT::i1));
15785+
}
15786+
}
15787+
1573415788
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
1573515789
return V;
1573615790
if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -598,13 +598,10 @@ class RISCVTargetLowering : public TargetLowering {
598598
/// this override can be removed.
599599
bool mergeStoresAfterLegalization(EVT VT) const override;
600600

601-
/// Disable normalizing
602-
/// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
603-
/// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y))
604-
/// RISC-V doesn't have flags so it's better to perform the and/or in a GPR.
605-
bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override {
606-
return false;
607-
}
601+
bool shouldNormalizeToSelectSequence(LLVMContext &, EVT VT,
602+
SDNode *N) const override;
603+
604+
bool hasConditionalZero() const override;
608605

609606
/// Disables storing and loading vectors by default when there are function
610607
/// calls between the load and store, since these are more expensive than just

llvm/test/CodeGen/RISCV/zicond-opts.ll

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,14 @@ define i32 @icmp_and(i64 %x, i64 %y) {
88
; RV32ZICOND: # %bb.0:
99
; RV32ZICOND-NEXT: or a2, a2, a3
1010
; RV32ZICOND-NEXT: or a0, a0, a1
11-
; RV32ZICOND-NEXT: snez a1, a2
1211
; RV32ZICOND-NEXT: snez a0, a0
13-
; RV32ZICOND-NEXT: and a0, a0, a1
12+
; RV32ZICOND-NEXT: czero.eqz a0, a0, a2
1413
; RV32ZICOND-NEXT: ret
1514
;
1615
; RV64ZICOND-LABEL: icmp_and:
1716
; RV64ZICOND: # %bb.0:
18-
; RV64ZICOND-NEXT: snez a1, a1
1917
; RV64ZICOND-NEXT: snez a0, a0
20-
; RV64ZICOND-NEXT: and a0, a0, a1
18+
; RV64ZICOND-NEXT: czero.eqz a0, a0, a1
2119
; RV64ZICOND-NEXT: ret
2220
%3 = icmp ne i64 %y, 0
2321
%4 = icmp ne i64 %x, 0
@@ -32,21 +30,17 @@ define i32 @icmp_and_and(i64 %x, i64 %y, i64 %z) {
3230
; RV32ZICOND: # %bb.0:
3331
; RV32ZICOND-NEXT: or a2, a2, a3
3432
; RV32ZICOND-NEXT: or a0, a0, a1
35-
; RV32ZICOND-NEXT: or a4, a4, a5
3633
; RV32ZICOND-NEXT: snez a1, a2
37-
; RV32ZICOND-NEXT: snez a0, a0
38-
; RV32ZICOND-NEXT: and a0, a1, a0
39-
; RV32ZICOND-NEXT: snez a1, a4
40-
; RV32ZICOND-NEXT: and a0, a1, a0
34+
; RV32ZICOND-NEXT: czero.eqz a0, a1, a0
35+
; RV32ZICOND-NEXT: or a4, a4, a5
36+
; RV32ZICOND-NEXT: czero.eqz a0, a0, a4
4137
; RV32ZICOND-NEXT: ret
4238
;
4339
; RV64ZICOND-LABEL: icmp_and_and:
4440
; RV64ZICOND: # %bb.0:
4541
; RV64ZICOND-NEXT: snez a1, a1
46-
; RV64ZICOND-NEXT: snez a0, a0
47-
; RV64ZICOND-NEXT: and a0, a1, a0
48-
; RV64ZICOND-NEXT: snez a1, a2
49-
; RV64ZICOND-NEXT: and a0, a1, a0
42+
; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
43+
; RV64ZICOND-NEXT: czero.eqz a0, a0, a2
5044
; RV64ZICOND-NEXT: ret
5145
%4 = icmp ne i64 %y, 0
5246
%5 = icmp ne i64 %x, 0

0 commit comments

Comments
 (0)