Skip to content

Commit 821a64f

Browse files
committed
[RISCV] Optimize (and (icmp x, 0, eq), (icmp y, 0, eq)) utilizing zicond extension
%1 = icmp x, 0, eq %2 = icmp y, 0, eq %3 = and %1, %2 Origionally lowered to: %1 = seqz x %2 = seqz y %3 = and %1, %2 With optimiztion: %1 = seqz x %3 = czero.eqz %1, y
1 parent b67a9ba commit 821a64f

File tree

4 files changed

+82
-31
lines changed

4 files changed

+82
-31
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12202,7 +12202,9 @@ static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL,
1220212202

1220312203
// select Cond, T, Cond --> and Cond, freeze(T)
1220412204
// select Cond, T, 0 --> and Cond, freeze(T)
12205-
if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
12205+
// select Cond, T, 0 is a conditional zero
12206+
if (Cond == F || (!TLI.hasConditionalZero() &&
12207+
isNullOrNullSplat(F, /* AllowUndefs */ true)))
1220612208
return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));
1220712209

1220812210
// select Cond, T, 1 --> or (not Cond), freeze(T)
@@ -12213,7 +12215,9 @@ static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL,
1221312215
}
1221412216

1221512217
// select Cond, 0, F --> and (not Cond), freeze(F)
12216-
if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
12218+
// select Cond, 0, F is a conditional zero
12219+
if (!TLI.hasConditionalZero() &&
12220+
isNullOrNullSplat(T, /* AllowUndefs */ true)) {
1221712221
SDValue NotCond =
1221812222
matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
1221912223
return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2763,6 +2763,47 @@ bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
27632763
(VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
27642764
}
27652765

2766+
// Can the given operation be interchanged with a Zicond::CZERO operation
2767+
// Must be:
2768+
// - a SETCC instruction
2769+
// - Must compare a value for [in]equality against 0
2770+
static bool isCzeroCompatible(const SDValue Op) {
2771+
if (Op.getValueType() == MVT::i1 && Op.getOpcode() == ISD::SETCC &&
2772+
isNullConstant(Op.getOperand(1))) {
2773+
ISD::CondCode CondCode = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2774+
return CondCode == ISD::SETNE || CondCode == ISD::SETEQ;
2775+
}
2776+
return false;
2777+
}
2778+
2779+
// Disable normalizing for most cases
2780+
// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
2781+
// select(N0|N1, X, Y) => select(N0, Y, select(N1, X, Y))
2782+
// For select(N0, select(N1, X, Y), Y), if Y=0 and N0=setcc(eqz || nez):
2783+
// %N1 = setcc [any_cond] %A, %B
2784+
// %CZ = czero.eqz %N1, X
2785+
// %Res = czero.eqz %N0, %CZ
2786+
// ...
2787+
// But for select(N0&N1, X, Y):
2788+
// %N0 = setcc [eq/ne] %C, 0
2789+
// %N1 = setcc [any_cond] %A, %B
2790+
// %And = and %N0, %N1
2791+
// %Res = czero.eqz %And, %X
2792+
bool RISCVTargetLowering::shouldNormalizeToSelectSequence(LLVMContext &, EVT VT,
2793+
SDNode *N) const {
2794+
if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) {
2795+
assert(
2796+
N->getOpcode() == ISD::SELECT &&
2797+
"shouldNormalizeToSelectSequence() called with non-SELECT operation");
2798+
const SDValue &CondV = N->getOperand(0);
2799+
const SDValue &TrueV = N->getOperand(1);
2800+
const SDValue &FalseV = N->getOperand(2);
2801+
if (CondV.hasOneUse() && isCzeroCompatible(CondV) && isNullConstant(FalseV))
2802+
return true;
2803+
}
2804+
return false;
2805+
}
2806+
27662807
bool RISCVTargetLowering::hasConditionalZero() const {
27672808
return Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps();
27682809
}
@@ -16121,6 +16162,25 @@ static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG,
1612116162
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
1612216163
}
1612316164

16165+
static SDValue reduceANDOfSetCC(SDNode *N, SelectionDAG &DAG,
16166+
const RISCVSubtarget &Subtarget) {
16167+
if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) {
16168+
// (and (i1) f, (setcc c, 0, ne)) -> (select c, f, 0) -> (czero.nez f, c)
16169+
// (and (i1) f, (setcc c, 0, eq)) -> (select c, 0, f) -> (czero.eqz f, c)
16170+
// (and (setcc c, 0, ne), (i1) g) -> (select c, g, 0) -> (czero.nez g, c)
16171+
// (and (setcc c, 0, eq), (i1) g) -> (select c, 0, g) -> (czero.eqz g, c)
16172+
const bool CzeroOp1 = isCzeroCompatible(N->getOperand(1));
16173+
if (CzeroOp1 || isCzeroCompatible(N->getOperand(0))) {
16174+
const SDValue I1Op = CzeroOp1 ? N->getOperand(0) : N->getOperand(1);
16175+
const SDValue SetCCOp = CzeroOp1 ? N->getOperand(1) : N->getOperand(0);
16176+
SDLoc DL(N);
16177+
return DAG.getNode(ISD::SELECT, DL, MVT::i1, SetCCOp, I1Op,
16178+
DAG.getConstant(0, DL, MVT::i1));
16179+
}
16180+
}
16181+
return SDValue();
16182+
}
16183+
1612416184
static SDValue reduceANDOfAtomicLoad(SDNode *N,
1612516185
TargetLowering::DAGCombinerInfo &DCI) {
1612616186
SelectionDAG &DAG = DCI.DAG;
@@ -16184,7 +16244,8 @@ static SDValue performANDCombine(SDNode *N,
1618416244

1618516245
if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
1618616246
return V;
16187-
16247+
if (SDValue V = reduceANDOfSetCC(N, DAG, Subtarget))
16248+
return V;
1618816249
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
1618916250
return V;
1619016251
if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -603,14 +603,8 @@ class RISCVTargetLowering : public TargetLowering {
603603
/// this override can be removed.
604604
bool mergeStoresAfterLegalization(EVT VT) const override;
605605

606-
/// Disable normalizing
607-
/// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
608-
/// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y))
609-
/// RISC-V doesn't have flags so it's better to perform the and/or in a GPR.
610-
bool shouldNormalizeToSelectSequence(LLVMContext &, EVT,
611-
SDNode *N) const override {
612-
return false;
613-
}
606+
bool shouldNormalizeToSelectSequence(LLVMContext &, EVT VT,
607+
SDNode *N) const override;
614608

615609
bool hasConditionalZero() const override;
616610

llvm/test/CodeGen/RISCV/zicond-opts.ll

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,14 @@ define i32 @icmp_and(i64 %x, i64 %y) {
88
; RV32ZICOND: # %bb.0:
99
; RV32ZICOND-NEXT: or a2, a2, a3
1010
; RV32ZICOND-NEXT: or a0, a0, a1
11-
; RV32ZICOND-NEXT: snez a1, a2
1211
; RV32ZICOND-NEXT: snez a0, a0
13-
; RV32ZICOND-NEXT: and a0, a0, a1
12+
; RV32ZICOND-NEXT: czero.eqz a0, a0, a2
1413
; RV32ZICOND-NEXT: ret
1514
;
1615
; RV64ZICOND-LABEL: icmp_and:
1716
; RV64ZICOND: # %bb.0:
18-
; RV64ZICOND-NEXT: snez a1, a1
1917
; RV64ZICOND-NEXT: snez a0, a0
20-
; RV64ZICOND-NEXT: and a0, a0, a1
18+
; RV64ZICOND-NEXT: czero.eqz a0, a0, a1
2119
; RV64ZICOND-NEXT: ret
2220
%3 = icmp ne i64 %y, 0
2321
%4 = icmp ne i64 %x, 0
@@ -32,20 +30,18 @@ define i32 @icmp_and_select(i64 %x, i64 %y, i32 %z) {
3230
; RV32ZICOND: # %bb.0:
3331
; RV32ZICOND-NEXT: sgtz a5, a3
3432
; RV32ZICOND-NEXT: snez a2, a2
35-
; RV32ZICOND-NEXT: or a0, a0, a1
36-
; RV32ZICOND-NEXT: czero.eqz a1, a5, a3
33+
; RV32ZICOND-NEXT: czero.eqz a5, a5, a3
3734
; RV32ZICOND-NEXT: czero.nez a2, a2, a3
38-
; RV32ZICOND-NEXT: or a1, a2, a1
39-
; RV32ZICOND-NEXT: snez a0, a0
40-
; RV32ZICOND-NEXT: and a0, a0, a1
35+
; RV32ZICOND-NEXT: or a2, a2, a5
36+
; RV32ZICOND-NEXT: or a0, a0, a1
37+
; RV32ZICOND-NEXT: czero.eqz a0, a2, a0
4138
; RV32ZICOND-NEXT: czero.eqz a0, a4, a0
4239
; RV32ZICOND-NEXT: ret
4340
;
4441
; RV64ZICOND-LABEL: icmp_and_select:
4542
; RV64ZICOND: # %bb.0:
4643
; RV64ZICOND-NEXT: sgtz a1, a1
47-
; RV64ZICOND-NEXT: snez a0, a0
48-
; RV64ZICOND-NEXT: and a0, a0, a1
44+
; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
4945
; RV64ZICOND-NEXT: czero.eqz a0, a2, a0
5046
; RV64ZICOND-NEXT: ret
5147
%3 = icmp sgt i64 %y, 0
@@ -61,21 +57,17 @@ define i32 @icmp_and_and(i64 %x, i64 %y, i64 %z) {
6157
; RV32ZICOND: # %bb.0:
6258
; RV32ZICOND-NEXT: or a2, a2, a3
6359
; RV32ZICOND-NEXT: or a0, a0, a1
64-
; RV32ZICOND-NEXT: or a4, a4, a5
6560
; RV32ZICOND-NEXT: snez a1, a2
66-
; RV32ZICOND-NEXT: snez a0, a0
67-
; RV32ZICOND-NEXT: and a0, a1, a0
68-
; RV32ZICOND-NEXT: snez a1, a4
69-
; RV32ZICOND-NEXT: and a0, a1, a0
61+
; RV32ZICOND-NEXT: czero.eqz a0, a1, a0
62+
; RV32ZICOND-NEXT: or a4, a4, a5
63+
; RV32ZICOND-NEXT: czero.eqz a0, a0, a4
7064
; RV32ZICOND-NEXT: ret
7165
;
7266
; RV64ZICOND-LABEL: icmp_and_and:
7367
; RV64ZICOND: # %bb.0:
7468
; RV64ZICOND-NEXT: snez a1, a1
75-
; RV64ZICOND-NEXT: snez a0, a0
76-
; RV64ZICOND-NEXT: and a0, a1, a0
77-
; RV64ZICOND-NEXT: snez a1, a2
78-
; RV64ZICOND-NEXT: and a0, a1, a0
69+
; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
70+
; RV64ZICOND-NEXT: czero.eqz a0, a0, a2
7971
; RV64ZICOND-NEXT: ret
8072
%4 = icmp ne i64 %y, 0
8173
%5 = icmp ne i64 %x, 0

0 commit comments

Comments
 (0)