Skip to content

Commit e2a55b8

Browse files
authored
[LoongArch] Add custom lowering for BCOND and perform BR_CC combine (llvm#147885)
This patch attempts to optimize conditional branches by combinding logical operations within the conditions. This enables the selection of more efficient branch instructions. For example, for integers, `blez x` can be used instead of `blt x, (ori, t, 1)`; for floating-point comparisons, dedicated floating-point branch instructions can be used to avoid moving the result to an integer register.
1 parent ffb756d commit e2a55b8

File tree

11 files changed

+246
-222
lines changed

11 files changed

+246
-222
lines changed

llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
def NotBoolXor : PatFrags<(ops node:$val),
14+
[(xor node:$val, -1), (xor node:$val, 1)]>;
15+
1316
//===----------------------------------------------------------------------===//
1417
// LoongArch specific DAG Nodes.
1518
//===----------------------------------------------------------------------===//
@@ -22,6 +25,9 @@ def SDT_LoongArchFTINT : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
2225
def SDT_LoongArchFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
2326
def SDT_LoongArchFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
2427

28+
// ISD::BRCOND is custom-lowered to LoongArchISD::BRCOND for floating-point
29+
// comparisons to prevent recursive lowering.
30+
def loongarch_brcond : SDNode<"LoongArchISD::BRCOND", SDTBrcond, [SDNPHasChain]>;
2531
def loongarch_movgr2fr_w_la64
2632
: SDNode<"LoongArchISD::MOVGR2FR_W_LA64", SDT_LoongArchMOVGR2FR_W_LA64>;
2733
def loongarch_movfr2gr_s_la64
@@ -208,16 +214,18 @@ def : PatFPSetcc<SETUO, FCMP_CUN_S, FPR32>;
208214
def : PatFPSetcc<SETLT, FCMP_CLT_S, FPR32>;
209215

210216
multiclass PatFPBrcond<CondCode cc, LAInst CmpInst, RegisterClass RegTy> {
211-
def : Pat<(brcond (xor (GRLenVT (setcc RegTy:$fj, RegTy:$fk, cc)), -1),
212-
bb:$imm21),
217+
def : Pat<(loongarch_brcond (NotBoolXor (GRLenVT (setcc RegTy:$fj, RegTy:$fk, cc))),
218+
bb:$imm21),
213219
(BCEQZ (CmpInst RegTy:$fj, RegTy:$fk), bb:$imm21)>;
214-
def : Pat<(brcond (GRLenVT (setcc RegTy:$fj, RegTy:$fk, cc)), bb:$imm21),
220+
def : Pat<(loongarch_brcond (GRLenVT (setcc RegTy:$fj, RegTy:$fk, cc)), bb:$imm21),
215221
(BCNEZ (CmpInst RegTy:$fj, RegTy:$fk), bb:$imm21)>;
216222
}
217223

218224
defm : PatFPBrcond<SETOEQ, FCMP_CEQ_S, FPR32>;
225+
defm : PatFPBrcond<SETEQ , FCMP_CEQ_S, FPR32>;
219226
defm : PatFPBrcond<SETOLT, FCMP_CLT_S, FPR32>;
220227
defm : PatFPBrcond<SETOLE, FCMP_CLE_S, FPR32>;
228+
defm : PatFPBrcond<SETLE, FCMP_CLE_S, FPR32>;
221229
defm : PatFPBrcond<SETONE, FCMP_CNE_S, FPR32>;
222230
defm : PatFPBrcond<SETO, FCMP_COR_S, FPR32>;
223231
defm : PatFPBrcond<SETUEQ, FCMP_CUEQ_S, FPR32>;

llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,10 @@ def : PatFPSetcc<SETUO, FCMP_CUN_D, FPR64>;
184184
def : PatFPSetcc<SETLT, FCMP_CLT_D, FPR64>;
185185

186186
defm : PatFPBrcond<SETOEQ, FCMP_CEQ_D, FPR64>;
187+
defm : PatFPBrcond<SETEQ, FCMP_CEQ_D, FPR64>;
187188
defm : PatFPBrcond<SETOLT, FCMP_CLT_D, FPR64>;
188189
defm : PatFPBrcond<SETOLE, FCMP_CLE_D, FPR64>;
190+
defm : PatFPBrcond<SETLE, FCMP_CLE_D, FPR64>;
189191
defm : PatFPBrcond<SETONE, FCMP_CNE_D, FPR64>;
190192
defm : PatFPBrcond<SETO, FCMP_COR_D, FPR64>;
191193
defm : PatFPBrcond<SETUEQ, FCMP_CUEQ_D, FPR64>;

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
127127

128128
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
129129
setOperationAction(ISD::BR_CC, GRLenVT, Expand);
130+
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
130131
setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
131132
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
132133
setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand);
@@ -516,6 +517,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
516517
return lowerPREFETCH(Op, DAG);
517518
case ISD::SELECT:
518519
return lowerSELECT(Op, DAG);
520+
case ISD::BRCOND:
521+
return lowerBRCOND(Op, DAG);
519522
case ISD::FP_TO_FP16:
520523
return lowerFP_TO_FP16(Op, DAG);
521524
case ISD::FP16_TO_FP:
@@ -913,6 +916,35 @@ SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
913916
return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
914917
}
915918

919+
SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
920+
SelectionDAG &DAG) const {
921+
SDValue CondV = Op.getOperand(1);
922+
SDLoc DL(Op);
923+
MVT GRLenVT = Subtarget.getGRLenVT();
924+
925+
if (CondV.getOpcode() == ISD::SETCC) {
926+
if (CondV.getOperand(0).getValueType() == GRLenVT) {
927+
SDValue LHS = CondV.getOperand(0);
928+
SDValue RHS = CondV.getOperand(1);
929+
ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
930+
931+
translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
932+
933+
SDValue TargetCC = DAG.getCondCode(CCVal);
934+
return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
935+
Op.getOperand(0), LHS, RHS, TargetCC,
936+
Op.getOperand(2));
937+
} else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
938+
return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
939+
Op.getOperand(0), CondV, Op.getOperand(2));
940+
}
941+
}
942+
943+
return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
944+
Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
945+
DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
946+
}
947+
916948
SDValue
917949
LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
918950
SelectionDAG &DAG) const {
@@ -5224,6 +5256,71 @@ static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
52245256
Src.getOperand(0));
52255257
}
52265258

5259+
// Perform combines for BR_CC conditions.
5260+
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
5261+
SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
5262+
ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5263+
5264+
// As far as arithmetic right shift always saves the sign,
5265+
// shift can be omitted.
5266+
// Fold setlt (sra X, N), 0 -> setlt X, 0 and
5267+
// setge (sra X, N), 0 -> setge X, 0
5268+
if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
5269+
LHS.getOpcode() == ISD::SRA) {
5270+
LHS = LHS.getOperand(0);
5271+
return true;
5272+
}
5273+
5274+
if (!ISD::isIntEqualitySetCC(CCVal))
5275+
return false;
5276+
5277+
// Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
5278+
// Sometimes the setcc is introduced after br_cc/select_cc has been formed.
5279+
if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5280+
LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
5281+
// If we're looking for eq 0 instead of ne 0, we need to invert the
5282+
// condition.
5283+
bool Invert = CCVal == ISD::SETEQ;
5284+
CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
5285+
if (Invert)
5286+
CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5287+
5288+
RHS = LHS.getOperand(1);
5289+
LHS = LHS.getOperand(0);
5290+
translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
5291+
5292+
CC = DAG.getCondCode(CCVal);
5293+
return true;
5294+
}
5295+
5296+
// (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
5297+
// This can occur when legalizing some floating point comparisons.
5298+
APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
5299+
if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
5300+
CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5301+
CC = DAG.getCondCode(CCVal);
5302+
RHS = DAG.getConstant(0, DL, LHS.getValueType());
5303+
return true;
5304+
}
5305+
5306+
return false;
5307+
}
5308+
5309+
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG,
5310+
TargetLowering::DAGCombinerInfo &DCI,
5311+
const LoongArchSubtarget &Subtarget) {
5312+
SDValue LHS = N->getOperand(1);
5313+
SDValue RHS = N->getOperand(2);
5314+
SDValue CC = N->getOperand(3);
5315+
SDLoc DL(N);
5316+
5317+
if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5318+
return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
5319+
N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
5320+
5321+
return SDValue();
5322+
}
5323+
52275324
template <unsigned N>
52285325
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp,
52295326
SelectionDAG &DAG,
@@ -5916,6 +6013,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
59166013
return performBITCASTCombine(N, DAG, DCI, Subtarget);
59176014
case LoongArchISD::BITREV_W:
59186015
return performBITREV_WCombine(N, DAG, DCI, Subtarget);
6016+
case LoongArchISD::BR_CC:
6017+
return performBR_CCCombine(N, DAG, DCI, Subtarget);
59196018
case ISD::INTRINSIC_WO_CHAIN:
59206019
return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
59216020
case LoongArchISD::MOVGR2FR_W_LA64:
@@ -6645,6 +6744,8 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
66456744
NODE_NAME_CASE(TAIL_MEDIUM)
66466745
NODE_NAME_CASE(TAIL_LARGE)
66476746
NODE_NAME_CASE(SELECT_CC)
6747+
NODE_NAME_CASE(BR_CC)
6748+
NODE_NAME_CASE(BRCOND)
66486749
NODE_NAME_CASE(SLL_W)
66496750
NODE_NAME_CASE(SRA_W)
66506751
NODE_NAME_CASE(SRL_W)

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ enum NodeType : unsigned {
3737
// Select
3838
SELECT_CC,
3939

40+
// Branch
41+
BR_CC,
42+
BRCOND,
43+
4044
// 32-bit shifts, directly matching the semantics of the named LoongArch
4145
// instructions.
4246
SLL_W,
@@ -385,6 +389,7 @@ class LoongArchTargetLowering : public TargetLowering {
385389
SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
386390
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
387391
SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
392+
SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
388393
SDValue lowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const;
389394
SDValue lowerFP16_TO_FP(SDValue Op, SelectionDAG &DAG) const;
390395
SDValue lowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;

llvm/lib/Target/LoongArch/LoongArchInstrInfo.td

Lines changed: 25 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ def SDT_LoongArchSelectCC : SDTypeProfile<1, 5, [SDTCisSameAs<1, 2>,
3131
SDTCisSameAs<0, 4>,
3232
SDTCisSameAs<4, 5>]>;
3333

34+
def SDT_LoongArchBrCC : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>,
35+
SDTCisVT<2, OtherVT>,
36+
SDTCisVT<3, OtherVT>]>;
37+
3438
def SDT_LoongArchBStrIns: SDTypeProfile<1, 4, [
3539
SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<3>,
3640
SDTCisSameAs<3, 4>
@@ -94,6 +98,8 @@ def loongarch_tail_large : SDNode<"LoongArchISD::TAIL_LARGE", SDT_LoongArchCall,
9498
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
9599
SDNPVariadic]>;
96100
def loongarch_selectcc : SDNode<"LoongArchISD::SELECT_CC", SDT_LoongArchSelectCC>;
101+
def loongarch_brcc : SDNode<"LoongArchISD::BR_CC", SDT_LoongArchBrCC,
102+
[SDNPHasChain]>;
97103
def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>;
98104
def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>;
99105
def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>;
@@ -1537,47 +1543,29 @@ def : Pat<(select GPR:$cond, GPR:$t, GPR:$f),
15371543

15381544
/// Branches and jumps
15391545

1540-
class BccPat<PatFrag CondOp, LAInst Inst>
1541-
: Pat<(brcond (GRLenVT (CondOp GPR:$rj, GPR:$rd)), bb:$imm16),
1542-
(Inst GPR:$rj, GPR:$rd, bb:$imm16)>;
1543-
1544-
def : BccPat<seteq, BEQ>;
1545-
def : BccPat<setne, BNE>;
1546-
def : BccPat<setlt, BLT>;
1547-
def : BccPat<setge, BGE>;
1548-
def : BccPat<setult, BLTU>;
1549-
def : BccPat<setuge, BGEU>;
1550-
1551-
class BccSwapPat<PatFrag CondOp, LAInst InstBcc>
1552-
: Pat<(brcond (GRLenVT (CondOp GPR:$rd, GPR:$rj)), bb:$imm16),
1553-
(InstBcc GPR:$rj, GPR:$rd, bb:$imm16)>;
1554-
1555-
// Condition codes that don't have matching LoongArch branch instructions, but
1556-
// are trivially supported by swapping the two input operands.
1557-
def : BccSwapPat<setgt, BLT>;
1558-
def : BccSwapPat<setle, BGE>;
1559-
def : BccSwapPat<setugt, BLTU>;
1560-
def : BccSwapPat<setule, BGEU>;
1561-
15621546
let Predicates = [Has32S] in {
1563-
// An extra pattern is needed for a brcond without a setcc (i.e. where the
1564-
// condition was calculated elsewhere).
1565-
def : Pat<(brcond GPR:$rj, bb:$imm21), (BNEZ GPR:$rj, bb:$imm21)>;
1566-
1567-
def : Pat<(brcond (GRLenVT (seteq GPR:$rj, 0)), bb:$imm21),
1568-
(BEQZ GPR:$rj, bb:$imm21)>;
1569-
def : Pat<(brcond (GRLenVT (setne GPR:$rj, 0)), bb:$imm21),
1570-
(BNEZ GPR:$rj, bb:$imm21)>;
1547+
class BccZeroPat<CondCode Cond, LAInst Inst>
1548+
: Pat<(loongarch_brcc (GRLenVT GPR:$rj), 0, Cond, bb:$imm21),
1549+
(Inst GPR:$rj, bb:$imm21)>;
1550+
1551+
def : BccZeroPat<SETEQ, BEQZ>;
1552+
def : BccZeroPat<SETNE, BNEZ>;
15711553
} // Predicates = [Has32S]
15721554

1573-
// An extra pattern is needed for a brcond without a setcc (i.e. where the
1574-
// condition was calculated elsewhere).
1575-
def : Pat<(brcond GPR:$rj, bb:$imm16), (BNE GPR:$rj, R0, bb:$imm16)>;
1555+
multiclass BccPat<CondCode Cond, LAInst Inst> {
1556+
def : Pat<(loongarch_brcc (GRLenVT GPR:$rj), GPR:$rd, Cond, bb:$imm16),
1557+
(Inst GPR:$rj, GPR:$rd, bb:$imm16)>;
1558+
// Explicitly select 0 to R0. The register coalescer doesn't always do it.
1559+
def : Pat<(loongarch_brcc (GRLenVT GPR:$rj), 0, Cond, bb:$imm16),
1560+
(Inst GPR:$rj, (GRLenVT R0), bb:$imm16)>;
1561+
}
15761562

1577-
def : Pat<(brcond (GRLenVT (seteq GPR:$rj, 0)), bb:$imm16),
1578-
(BEQ GPR:$rj, R0, bb:$imm16)>;
1579-
def : Pat<(brcond (GRLenVT (setne GPR:$rj, 0)), bb:$imm16),
1580-
(BNE GPR:$rj, R0, bb:$imm16)>;
1563+
defm : BccPat<SETEQ, BEQ>;
1564+
defm : BccPat<SETNE, BNE>;
1565+
defm : BccPat<SETLT, BLT>;
1566+
defm : BccPat<SETGE, BGE>;
1567+
defm : BccPat<SETULT, BLTU>;
1568+
defm : BccPat<SETUGE, BGEU>;
15811569

15821570
let isBarrier = 1, isBranch = 1, isTerminator = 1 in
15831571
def PseudoBR : Pseudo<(outs), (ins simm26_b:$imm26), [(br bb:$imm26)]>,

0 commit comments

Comments
 (0)