Skip to content

Commit 709e4ad

Browse files
committed
[LoongArch] Add codegen support for the bitwise binary operations and part of other operations
Reference: https://llvm.org/docs/LangRef.html#bitwise-binary-operations https://llvm.org/docs/LangRef.html#other-operations The reason why other operations are implemented here is that some bitwise binary operations depend on them. For example, on loongarch32, `shl` over i64 data requires `select`. Differential Revision: https://reviews.llvm.org/D127203
1 parent efc7005 commit 709e4ad

28 files changed

+4811
-6
lines changed

llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,4 +128,50 @@ def : PatFprFpr<fmul, FMUL_S, FPR32>;
128128
def : PatFprFpr<fdiv, FDIV_S, FPR32>;
129129
def : PatFpr<fneg, FNEG_S, FPR32>;
130130

131+
/// Setcc
132+
133+
// Match non-signaling comparison
134+
135+
// TODO: change setcc to any_fsetcc after call is supported because
136+
// we need to call llvm.experimental.constrained.fcmp.f32 in testcase.
137+
// See RISCV float-fcmp-strict.ll for reference.
138+
class PatFPSetcc<CondCode cc, LAInst CmpInst, RegisterClass RegTy>
139+
: Pat<(setcc RegTy:$fj, RegTy:$fk, cc),
140+
(MOVCF2GR (CmpInst RegTy:$fj, RegTy:$fk))>;
141+
// SETOGT/SETOGE/SETUGT/SETUGE will expand into SETOLT/SETOLE/SETULT/SETULE.
142+
def : PatFPSetcc<SETOEQ, FCMP_CEQ_S, FPR32>;
143+
def : PatFPSetcc<SETOLT, FCMP_CLT_S, FPR32>;
144+
def : PatFPSetcc<SETOLE, FCMP_CLE_S, FPR32>;
145+
def : PatFPSetcc<SETONE, FCMP_CNE_S, FPR32>;
146+
def : PatFPSetcc<SETO, FCMP_COR_S, FPR32>;
147+
def : PatFPSetcc<SETUEQ, FCMP_CUEQ_S, FPR32>;
148+
def : PatFPSetcc<SETULT, FCMP_CULT_S, FPR32>;
149+
def : PatFPSetcc<SETULE, FCMP_CULE_S, FPR32>;
150+
def : PatFPSetcc<SETUNE, FCMP_CUNE_S, FPR32>;
151+
def : PatFPSetcc<SETUO, FCMP_CUN_S, FPR32>;
152+
153+
// TODO: Match signaling comparison strict_fsetccs with FCMP_S*_S instructions.
154+
155+
/// Select
156+
157+
def : Pat<(select GPR:$cc, FPR32:$fk, FPR32:$fj),
158+
(FSEL_S FPR32:$fj, FPR32:$fk, (MOVGR2CF GPR:$cc))>;
159+
160+
/// Selectcc
161+
162+
class PatFPSelectcc<CondCode cc, LAInst CmpInst, LAInst SelInst,
163+
RegisterClass RegTy>
164+
: Pat<(select (GRLenVT (setcc RegTy:$a, RegTy:$b, cc)), RegTy:$t, RegTy:$f),
165+
(SelInst RegTy:$f, RegTy:$t, (CmpInst RegTy:$a, RegTy:$b))>;
166+
def : PatFPSelectcc<SETOEQ, FCMP_CEQ_S, FSEL_S, FPR32>;
167+
def : PatFPSelectcc<SETOLT, FCMP_CLT_S, FSEL_S, FPR32>;
168+
def : PatFPSelectcc<SETOLE, FCMP_CLE_S, FSEL_S, FPR32>;
169+
def : PatFPSelectcc<SETONE, FCMP_CNE_S, FSEL_S, FPR32>;
170+
def : PatFPSelectcc<SETO, FCMP_COR_S, FSEL_S, FPR32>;
171+
def : PatFPSelectcc<SETUEQ, FCMP_CUEQ_S, FSEL_S, FPR32>;
172+
def : PatFPSelectcc<SETULT, FCMP_CULT_S, FSEL_S, FPR32>;
173+
def : PatFPSelectcc<SETULE, FCMP_CULE_S, FSEL_S, FPR32>;
174+
def : PatFPSelectcc<SETUNE, FCMP_CUNE_S, FSEL_S, FPR32>;
175+
def : PatFPSelectcc<SETUO, FCMP_CUN_S, FSEL_S, FPR32>;
176+
131177
} // Predicates = [HasBasicF]

llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,4 +145,44 @@ def : PatFprFpr<fmul, FMUL_D, FPR64>;
145145
def : PatFprFpr<fdiv, FDIV_D, FPR64>;
146146
def : PatFpr<fneg, FNEG_D, FPR64>;
147147

148+
/// Setcc
149+
150+
// Match non-signaling comparison
151+
152+
// TODO: Change setcc to any_fsetcc after call is supported because
153+
// we need to call llvm.experimental.constrained.fcmp.f64 in testcase.
154+
// See RISCV float-fcmp-strict.ll for reference.
155+
156+
// SETOGT/SETOGE/SETUGT/SETUGE will expand into SETOLT/SETOLE/SETULT/SETULE.
157+
def : PatFPSetcc<SETOEQ, FCMP_CEQ_D, FPR64>;
158+
def : PatFPSetcc<SETOLT, FCMP_CLT_D, FPR64>;
159+
def : PatFPSetcc<SETOLE, FCMP_CLE_D, FPR64>;
160+
def : PatFPSetcc<SETONE, FCMP_CNE_D, FPR64>;
161+
def : PatFPSetcc<SETO, FCMP_COR_D, FPR64>;
162+
def : PatFPSetcc<SETUEQ, FCMP_CUEQ_D, FPR64>;
163+
def : PatFPSetcc<SETULT, FCMP_CULT_D, FPR64>;
164+
def : PatFPSetcc<SETULE, FCMP_CULE_D, FPR64>;
165+
def : PatFPSetcc<SETUNE, FCMP_CUNE_D, FPR64>;
166+
def : PatFPSetcc<SETUO, FCMP_CUN_D, FPR64>;
167+
168+
// TODO: Match signaling comparison strict_fsetccs with FCMP_S*_D instructions.
169+
170+
/// Select
171+
172+
def : Pat<(select GPR:$cc, FPR64:$fk, FPR64:$fj),
173+
(FSEL_D FPR64:$fj, FPR64:$fk, (MOVGR2CF GPR:$cc))>;
174+
175+
/// Selectcc
176+
177+
def : PatFPSelectcc<SETOEQ, FCMP_CEQ_D, FSEL_D, FPR64>;
178+
def : PatFPSelectcc<SETOLT, FCMP_CLT_D, FSEL_D, FPR64>;
179+
def : PatFPSelectcc<SETOLE, FCMP_CLE_D, FSEL_D, FPR64>;
180+
def : PatFPSelectcc<SETONE, FCMP_CNE_D, FSEL_D, FPR64>;
181+
def : PatFPSelectcc<SETO, FCMP_COR_D, FSEL_D, FPR64>;
182+
def : PatFPSelectcc<SETUEQ, FCMP_CUEQ_D, FSEL_D, FPR64>;
183+
def : PatFPSelectcc<SETULT, FCMP_CULT_D, FSEL_D, FPR64>;
184+
def : PatFPSelectcc<SETULE, FCMP_CULE_D, FSEL_D, FPR64>;
185+
def : PatFPSelectcc<SETUNE, FCMP_CUNE_D, FSEL_D, FPR64>;
186+
def : PatFPSelectcc<SETUO, FCMP_CUN_D, FSEL_D, FPR64>;
187+
148188
} // Predicates = [HasBasicD]

llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "LoongArchISelDAGToDAG.h"
1414
#include "MCTargetDesc/LoongArchMCTargetDesc.h"
1515
#include "MCTargetDesc/LoongArchMatInt.h"
16+
#include "llvm/Support/KnownBits.h"
1617

1718
using namespace llvm;
1819

@@ -64,6 +65,54 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
6465
// Select the default instruction.
6566
SelectCode(Node);
6667
}
68+
69+
bool LoongArchDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
70+
SDValue &ShAmt) {
71+
// Shift instructions on LoongArch only read the lower 5 or 6 bits of the
72+
// shift amount. If there is an AND on the shift amount, we can bypass it if
73+
// it doesn't affect any of those bits.
74+
if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
75+
const APInt &AndMask = N->getConstantOperandAPInt(1);
76+
77+
// Since the max shift amount is a power of 2 we can subtract 1 to make a
78+
// mask that covers the bits needed to represent all shift amounts.
79+
assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
80+
APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
81+
82+
if (ShMask.isSubsetOf(AndMask)) {
83+
ShAmt = N.getOperand(0);
84+
return true;
85+
}
86+
87+
// SimplifyDemandedBits may have optimized the mask so try restoring any
88+
// bits that are known zero.
89+
KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0));
90+
if (ShMask.isSubsetOf(AndMask | Known.Zero)) {
91+
ShAmt = N.getOperand(0);
92+
return true;
93+
}
94+
} else if (N.getOpcode() == ISD::SUB &&
95+
isa<ConstantSDNode>(N.getOperand(0))) {
96+
uint64_t Imm = N.getConstantOperandVal(0);
97+
// If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
98+
// generate a NEG instead of a SUB of a constant.
99+
if (Imm != 0 && Imm % ShiftWidth == 0) {
100+
SDLoc DL(N);
101+
EVT VT = N.getValueType();
102+
SDValue Zero =
103+
CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, LoongArch::R0, VT);
104+
unsigned NegOpc = VT == MVT::i64 ? LoongArch::SUB_D : LoongArch::SUB_W;
105+
MachineSDNode *Neg =
106+
CurDAG->getMachineNode(NegOpc, DL, VT, Zero, N.getOperand(1));
107+
ShAmt = SDValue(Neg, 0);
108+
return true;
109+
}
110+
}
111+
112+
ShAmt = N;
113+
return true;
114+
}
115+
67116
// This pass converts a legalized DAG into a LoongArch-specific DAG, ready
68117
// for instruction scheduling.
69118
FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM) {

llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,14 @@ class LoongArchDAGToDAGISel : public SelectionDAGISel {
3838

3939
void Select(SDNode *Node) override;
4040

41+
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt);
42+
bool selectShiftMaskGRLen(SDValue N, SDValue &ShAmt) {
43+
return selectShiftMask(N, Subtarget->getGRLen(), ShAmt);
44+
}
45+
bool selectShiftMask32(SDValue N, SDValue &ShAmt) {
46+
return selectShiftMask(N, 32, ShAmt);
47+
}
48+
4149
// Include the pieces autogenerated from the target description.
4250
#include "LoongArchGenDAGISel.inc"
4351
};

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "LoongArchSubtarget.h"
1919
#include "LoongArchTargetMachine.h"
2020
#include "llvm/ADT/Statistic.h"
21+
#include "llvm/CodeGen/ISDOpcodes.h"
2122
#include "llvm/Support/Debug.h"
2223

2324
using namespace llvm;
@@ -37,6 +38,29 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
3738
addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
3839

3940
// TODO: add necessary setOperationAction calls later.
41+
setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom);
42+
setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
43+
setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
44+
45+
if (Subtarget.is64Bit()) {
46+
setOperationAction(ISD::SHL, MVT::i32, Custom);
47+
setOperationAction(ISD::SRA, MVT::i32, Custom);
48+
setOperationAction(ISD::SRL, MVT::i32, Custom);
49+
}
50+
51+
static const ISD::CondCode FPCCToExpand[] = {ISD::SETOGT, ISD::SETOGE,
52+
ISD::SETUGT, ISD::SETUGE};
53+
54+
if (Subtarget.hasBasicF()) {
55+
setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
56+
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
57+
}
58+
if (Subtarget.hasBasicD()) {
59+
setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
60+
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
61+
}
62+
63+
setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
4064

4165
// Compute derived properties from the register classes.
4266
computeRegisterProperties(STI.getRegisterInfo());
@@ -50,6 +74,169 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
5074
setMinFunctionAlignment(FunctionAlignment);
5175
}
5276

77+
SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
78+
SelectionDAG &DAG) const {
79+
switch (Op.getOpcode()) {
80+
default:
81+
report_fatal_error("unimplemented operand");
82+
case ISD::SHL_PARTS:
83+
return lowerShiftLeftParts(Op, DAG);
84+
case ISD::SRA_PARTS:
85+
return lowerShiftRightParts(Op, DAG, true);
86+
case ISD::SRL_PARTS:
87+
return lowerShiftRightParts(Op, DAG, false);
88+
case ISD::SHL:
89+
case ISD::SRA:
90+
case ISD::SRL:
91+
// This can be called for an i32 shift amount that needs to be promoted.
92+
assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
93+
"Unexpected custom legalisation");
94+
return SDValue();
95+
}
96+
}
97+
98+
SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
99+
SelectionDAG &DAG) const {
100+
SDLoc DL(Op);
101+
SDValue Lo = Op.getOperand(0);
102+
SDValue Hi = Op.getOperand(1);
103+
SDValue Shamt = Op.getOperand(2);
104+
EVT VT = Lo.getValueType();
105+
106+
// if Shamt-GRLen < 0: // Shamt < GRLen
107+
// Lo = Lo << Shamt
108+
// Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
109+
// else:
110+
// Lo = 0
111+
// Hi = Lo << (Shamt-GRLen)
112+
113+
SDValue Zero = DAG.getConstant(0, DL, VT);
114+
SDValue One = DAG.getConstant(1, DL, VT);
115+
SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
116+
SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
117+
SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
118+
SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
119+
120+
SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
121+
SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
122+
SDValue ShiftRightLo =
123+
DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
124+
SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
125+
SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
126+
SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
127+
128+
SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
129+
130+
Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
131+
Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
132+
133+
SDValue Parts[2] = {Lo, Hi};
134+
return DAG.getMergeValues(Parts, DL);
135+
}
136+
137+
SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
138+
SelectionDAG &DAG,
139+
bool IsSRA) const {
140+
SDLoc DL(Op);
141+
SDValue Lo = Op.getOperand(0);
142+
SDValue Hi = Op.getOperand(1);
143+
SDValue Shamt = Op.getOperand(2);
144+
EVT VT = Lo.getValueType();
145+
146+
// SRA expansion:
147+
// if Shamt-GRLen < 0: // Shamt < GRLen
148+
// Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
149+
// Hi = Hi >>s Shamt
150+
// else:
151+
// Lo = Hi >>s (Shamt-GRLen);
152+
// Hi = Hi >>s (GRLen-1)
153+
//
154+
// SRL expansion:
155+
// if Shamt-GRLen < 0: // Shamt < GRLen
156+
// Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
157+
// Hi = Hi >>u Shamt
158+
// else:
159+
// Lo = Hi >>u (Shamt-GRLen);
160+
// Hi = 0;
161+
162+
unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
163+
164+
SDValue Zero = DAG.getConstant(0, DL, VT);
165+
SDValue One = DAG.getConstant(1, DL, VT);
166+
SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
167+
SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
168+
SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
169+
SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
170+
171+
SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
172+
SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
173+
SDValue ShiftLeftHi =
174+
DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
175+
SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
176+
SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
177+
SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
178+
SDValue HiFalse =
179+
IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
180+
181+
SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
182+
183+
Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
184+
Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
185+
186+
SDValue Parts[2] = {Lo, Hi};
187+
return DAG.getMergeValues(Parts, DL);
188+
}
189+
190+
// Returns the opcode of the target-specific SDNode that implements the 32-bit
191+
// form of the given Opcode.
192+
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
193+
switch (Opcode) {
194+
default:
195+
llvm_unreachable("Unexpected opcode");
196+
case ISD::SHL:
197+
return LoongArchISD::SLL_W;
198+
case ISD::SRA:
199+
return LoongArchISD::SRA_W;
200+
case ISD::SRL:
201+
return LoongArchISD::SRL_W;
202+
}
203+
}
204+
205+
// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
206+
// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
207+
// otherwise be promoted to i64, making it difficult to select the
208+
// SLL_W/.../*W later one because the fact the operation was originally of
209+
// type i8/i16/i32 is lost.
210+
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
211+
unsigned ExtOpc = ISD::ANY_EXTEND) {
212+
SDLoc DL(N);
213+
LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
214+
SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
215+
SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
216+
SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
217+
// ReplaceNodeResults requires we maintain the same type for the return value.
218+
return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
219+
}
220+
221+
void LoongArchTargetLowering::ReplaceNodeResults(
222+
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
223+
SDLoc DL(N);
224+
switch (N->getOpcode()) {
225+
default:
226+
llvm_unreachable("Don't know how to legalize this operation");
227+
case ISD::SHL:
228+
case ISD::SRA:
229+
case ISD::SRL:
230+
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
231+
"Unexpected custom legalisation");
232+
if (N->getOperand(1).getOpcode() != ISD::Constant) {
233+
Results.push_back(customLegalizeToWOp(N, DAG));
234+
break;
235+
}
236+
break;
237+
}
238+
}
239+
53240
const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
54241
switch ((LoongArchISD::NodeType)Opcode) {
55242
case LoongArchISD::FIRST_NUMBER:
@@ -61,6 +248,9 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
61248

62249
// TODO: Add more target-dependent nodes later.
63250
NODE_NAME_CASE(RET)
251+
NODE_NAME_CASE(SLL_W)
252+
NODE_NAME_CASE(SRA_W)
253+
NODE_NAME_CASE(SRL_W)
64254
}
65255
#undef NODE_NAME_CASE
66256
return nullptr;

0 commit comments

Comments
 (0)