Skip to content

Commit 8b5e295

Browse files
committed
nfc cleanup 3
1 parent 543e3cf commit 8b5e295

File tree

5 files changed

+109
-168
lines changed

5 files changed

+109
-168
lines changed

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 70 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,6 +1027,64 @@ pickOpcodeForVT(MVT::SimpleValueType VT, std::optional<unsigned> Opcode_i16,
10271027
}
10281028
}
10291029

1030+
static inline bool isAddLike(const SDValue V) {
1031+
return V.getOpcode() == ISD::ADD ||
1032+
(V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint());
1033+
}
1034+
1035+
// selectBaseADDR - Match a dag node which will serve as the base address for an
1036+
// ADDR operand pair.
1037+
static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) {
1038+
if (const auto *GA = dyn_cast<GlobalAddressSDNode>(N))
1039+
return DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N),
1040+
GA->getValueType(0), GA->getOffset(),
1041+
GA->getTargetFlags());
1042+
if (const auto *ES = dyn_cast<ExternalSymbolSDNode>(N))
1043+
return DAG->getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
1044+
ES->getTargetFlags());
1045+
if (const auto *FIN = dyn_cast<FrameIndexSDNode>(N))
1046+
return DAG->getTargetFrameIndex(FIN->getIndex(), FIN->getValueType(0));
1047+
1048+
return N;
1049+
}
1050+
1051+
static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) {
1052+
APInt AccumulatedOffset(64u, 0);
1053+
while (isAddLike(Addr)) {
1054+
const auto *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
1055+
if (!CN)
1056+
break;
1057+
1058+
const APInt CI = CN->getAPIntValue().sext(64);
1059+
if (!(CI + AccumulatedOffset).isSignedIntN(32))
1060+
break;
1061+
1062+
AccumulatedOffset += CI;
1063+
Addr = Addr->getOperand(0);
1064+
}
1065+
return DAG->getSignedTargetConstant(AccumulatedOffset.getSExtValue(), DL,
1066+
MVT::i32);
1067+
}
1068+
1069+
static std::pair<SDValue, SDValue> selectADDR(SDValue Addr, SelectionDAG *DAG) {
1070+
SDValue Offset = accumulateOffset(Addr, SDLoc(Addr), DAG);
1071+
SDValue Base = selectBaseADDR(Addr, DAG);
1072+
return {Base, Offset};
1073+
}
1074+
1075+
// Select a pair of operands which represent a valid PTX address, this could be
1076+
// one of the following things:
1077+
// - [var] - Offset is simply set to 0
1078+
// - [reg] - Offset is simply set to 0
1079+
// - [reg+immOff]
1080+
// - [var+immOff]
1081+
// Note that immOff must fit into a 32-bit signed integer.
1082+
bool NVPTXDAGToDAGISel::SelectADDR(SDValue Addr, SDValue &Base,
1083+
SDValue &Offset) {
1084+
std::tie(Base, Offset) = selectADDR(Addr, CurDAG);
1085+
return true;
1086+
}
1087+
10301088
bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
10311089
MemSDNode *LD = cast<MemSDNode>(N);
10321090
assert(LD->readMem() && "Expected load");
@@ -1062,8 +1120,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
10621120
FromTypeWidth <= 128 && "Invalid width for load");
10631121

10641122
// Create the machine instruction DAG
1065-
SDValue Offset, Base;
1066-
SelectADDR(N->getOperand(1), Base, Offset);
1123+
const auto [Base, Offset] = selectADDR(N->getOperand(1), CurDAG);
10671124
SDValue Ops[] = {getI32Imm(Ordering, DL),
10681125
getI32Imm(Scope, DL),
10691126
getI32Imm(CodeAddrSpace, DL),
@@ -1144,8 +1201,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
11441201
assert(isPowerOf2_32(FromTypeWidth) && FromTypeWidth >= 8 &&
11451202
FromTypeWidth <= 128 && TotalWidth <= 256 && "Invalid width for load");
11461203

1147-
SDValue Offset, Base;
1148-
SelectADDR(N->getOperand(1), Base, Offset);
1204+
const auto [Base, Offset] = selectADDR(N->getOperand(1), CurDAG);
11491205
SDValue Ops[] = {getI32Imm(Ordering, DL),
11501206
getI32Imm(Scope, DL),
11511207
getI32Imm(CodeAddrSpace, DL),
@@ -1213,8 +1269,7 @@ bool NVPTXDAGToDAGISel::tryLDG(MemSDNode *LD) {
12131269
assert(isPowerOf2_32(FromTypeWidth) && FromTypeWidth >= 8 &&
12141270
FromTypeWidth <= 128 && TotalWidth <= 256 && "Invalid width for load");
12151271

1216-
SDValue Base, Offset;
1217-
SelectADDR(LD->getOperand(1), Base, Offset);
1272+
const auto [Base, Offset] = selectADDR(LD->getOperand(1), CurDAG);
12181273
SDValue Ops[] = {getI32Imm(FromType, DL), getI32Imm(FromTypeWidth, DL), Base,
12191274
Offset, LD->getChain()};
12201275

@@ -1278,8 +1333,7 @@ bool NVPTXDAGToDAGISel::tryLDU(SDNode *N) {
12781333
SDValue Addr =
12791334
LD->getOperand(LD->getOpcode() == ISD::INTRINSIC_W_CHAIN ? 2 : 1);
12801335

1281-
SDValue Base, Offset;
1282-
SelectADDR(Addr, Base, Offset);
1336+
const auto [Base, Offset] = selectADDR(Addr, CurDAG);
12831337
SDValue Ops[] = {getI32Imm(FromTypeWidth, DL), Base, Offset, LD->getChain()};
12841338

12851339
std::optional<unsigned> Opcode;
@@ -1339,9 +1393,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
13391393
assert(isPowerOf2_32(ToTypeWidth) && ToTypeWidth >= 8 && ToTypeWidth <= 128 &&
13401394
"Invalid width for store");
13411395

1342-
SDValue Offset, Base;
1343-
SelectADDR(ST->getBasePtr(), Base, Offset);
1344-
1396+
const auto [Base, Offset] = selectADDR(ST->getBasePtr(), CurDAG);
13451397
SDValue Ops[] = {selectPossiblyImm(Value),
13461398
getI32Imm(Ordering, DL),
13471399
getI32Imm(Scope, DL),
@@ -1399,9 +1451,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
13991451
assert(isPowerOf2_32(ToTypeWidth) && ToTypeWidth >= 8 && ToTypeWidth <= 128 &&
14001452
TotalWidth <= 256 && "Invalid width for store");
14011453

1402-
SDValue Offset, Base;
1403-
SelectADDR(Addr, Base, Offset);
1404-
1454+
const auto [Base, Offset] = selectADDR(Addr, CurDAG);
14051455
Ops.append({getI32Imm(Ordering, DL), getI32Imm(Scope, DL),
14061456
getI32Imm(CodeAddrSpace, DL), getI32Imm(ToTypeWidth, DL), Base,
14071457
Offset, Chain});
@@ -1708,59 +1758,6 @@ bool NVPTXDAGToDAGISel::tryBF16ArithToFMA(SDNode *N) {
17081758
return true;
17091759
}
17101760

1711-
static inline bool isAddLike(const SDValue V) {
1712-
return V.getOpcode() == ISD::ADD ||
1713-
(V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint());
1714-
}
1715-
1716-
// selectBaseADDR - Match a dag node which will serve as the base address for an
1717-
// ADDR operand pair.
1718-
static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) {
1719-
if (const auto *GA = dyn_cast<GlobalAddressSDNode>(N))
1720-
return DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N),
1721-
GA->getValueType(0), GA->getOffset(),
1722-
GA->getTargetFlags());
1723-
if (const auto *ES = dyn_cast<ExternalSymbolSDNode>(N))
1724-
return DAG->getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
1725-
ES->getTargetFlags());
1726-
if (const auto *FIN = dyn_cast<FrameIndexSDNode>(N))
1727-
return DAG->getTargetFrameIndex(FIN->getIndex(), FIN->getValueType(0));
1728-
1729-
return N;
1730-
}
1731-
1732-
static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) {
1733-
APInt AccumulatedOffset(64u, 0);
1734-
while (isAddLike(Addr)) {
1735-
const auto *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
1736-
if (!CN)
1737-
break;
1738-
1739-
const APInt CI = CN->getAPIntValue().sext(64);
1740-
if (!(CI + AccumulatedOffset).isSignedIntN(32))
1741-
break;
1742-
1743-
AccumulatedOffset += CI;
1744-
Addr = Addr->getOperand(0);
1745-
}
1746-
return DAG->getSignedTargetConstant(AccumulatedOffset.getSExtValue(), DL,
1747-
MVT::i32);
1748-
}
1749-
1750-
// Select a pair of operands which represent a valid PTX address, this could be
1751-
// one of the following things:
1752-
// - [var] - Offset is simply set to 0
1753-
// - [reg] - Offset is simply set to 0
1754-
// - [reg+immOff]
1755-
// - [var+immOff]
1756-
// Note that immOff must fit into a 32-bit signed integer.
1757-
bool NVPTXDAGToDAGISel::SelectADDR(SDValue Addr, SDValue &Base,
1758-
SDValue &Offset) {
1759-
Offset = accumulateOffset(Addr, SDLoc(Addr), CurDAG);
1760-
Base = selectBaseADDR(Addr, CurDAG);
1761-
return true;
1762-
}
1763-
17641761
SDValue NVPTXDAGToDAGISel::selectPossiblyImm(SDValue V) {
17651762
if (V.getOpcode() == ISD::BITCAST)
17661763
V = V.getOperand(0);
@@ -1774,37 +1771,20 @@ SDValue NVPTXDAGToDAGISel::selectPossiblyImm(SDValue V) {
17741771
return V;
17751772
}
17761773

1777-
bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
1778-
unsigned int spN) const {
1779-
const Value *Src = nullptr;
1780-
if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
1781-
if (spN == 0 && mN->getMemOperand()->getPseudoValue())
1782-
return true;
1783-
Src = mN->getMemOperand()->getValue();
1784-
}
1785-
if (!Src)
1786-
return false;
1787-
if (auto *PT = dyn_cast<PointerType>(Src->getType()))
1788-
return (PT->getAddressSpace() == spN);
1789-
return false;
1790-
}
1791-
17921774
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
17931775
/// inline asm expressions.
17941776
bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
17951777
const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
17961778
std::vector<SDValue> &OutOps) {
1797-
SDValue Op0, Op1;
17981779
switch (ConstraintID) {
17991780
default:
18001781
return true;
1801-
case InlineAsm::ConstraintCode::m: // memory
1802-
if (SelectADDR(Op, Op0, Op1)) {
1803-
OutOps.push_back(Op0);
1804-
OutOps.push_back(Op1);
1805-
return false;
1806-
}
1807-
break;
1782+
case InlineAsm::ConstraintCode::m: { // memory
1783+
const auto [Base, Offset] = selectADDR(Op, CurDAG);
1784+
OutOps.push_back(Base);
1785+
OutOps.push_back(Offset);
1786+
return false;
1787+
}
18081788
}
18091789
return true;
18101790
}

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
102102
SDValue getPTXCmpMode(const CondCodeSDNode &CondCode);
103103
SDValue selectPossiblyImm(SDValue V);
104104

105-
bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
106-
107105
// Returns the Memory Order and Scope that the PTX memory instruction should
108106
// use, and inserts appropriate fence instruction before the memory
109107
// instruction, if needed to implement the instructions memory order. Required

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 16 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -148,13 +148,16 @@ class OneUse2<SDPatternOperator operator>
148148
: PatFrag<(ops node:$A, node:$B), (operator node:$A, node:$B), [{ return N->hasOneUse(); }]>;
149149

150150

151-
class fpimm_pos_inf<ValueType vt>
152-
: FPImmLeaf<vt, [{ return Imm.isPosInfinity(); }]>;
153-
154151
class zeroinitializer<ValueType vt> :
155152
PatLeaf<(vt (bitconvert (!cast<ValueType>("i" # vt.Size) 0)))>;
156153

157154

155+
def fpimm_pos_inf : FPImmLeaf<fAny, [{ return Imm.isPosInfinity(); }]>;
156+
def fpimm_0 : FPImmLeaf<fAny, [{ return Imm.isZero(); }]>;
157+
def fpimm_1 : FPImmLeaf<fAny, [{ return Imm.isExactlyValue(1.0); }]>;
158+
def fpimm_neg_1 : FPImmLeaf<fAny, [{ return Imm.isExactlyValue(-1.0); }]>;
159+
160+
158161
// Operands which can hold a Register or an Immediate.
159162
//
160163
// Unfortunately, since most register classes can hold multiple types, we must
@@ -761,10 +764,10 @@ def fabs_oneuse : OneUse1<fabs>;
761764

762765
def TESTINF_f32r : BasicNVPTXInst<(outs B1:$p), (ins B32:$a),
763766
"testp.infinite.f32",
764-
[(set i1:$p, (seteq (fabs_oneuse f32:$a), fpimm_pos_inf<f32>))]>;
767+
[(set i1:$p, (seteq (fabs_oneuse f32:$a), fpimm_pos_inf))]>;
765768
def TESTINF_f64r : BasicNVPTXInst<(outs B1:$p), (ins B64:$a),
766769
"testp.infinite.f64",
767-
[(set i1:$p, (seteq (fabs_oneuse f64:$a), fpimm_pos_inf<f64>))]>;
770+
[(set i1:$p, (seteq (fabs_oneuse f64:$a), fpimm_pos_inf))]>;
768771

769772
//-----------------------------------
770773
// Integer Arithmetic
@@ -905,22 +908,6 @@ let Predicates = [hasOptEnabled] in {
905908
// Floating Point Arithmetic
906909
//-----------------------------------
907910

908-
// Constant 1.0f
909-
def f32imm_1 : FPImmLeaf<f32, [{
910-
return &Imm.getSemantics() == &llvm::APFloat::IEEEsingle() &&
911-
Imm.convertToFloat() == 1.0f;
912-
}]>;
913-
// Constant 1.0 (double)
914-
def f64imm_1 : FPImmLeaf<f64, [{
915-
return &Imm.getSemantics() == &llvm::APFloat::IEEEdouble() &&
916-
Imm.convertToDouble() == 1.0;
917-
}]>;
918-
// Constant -1.0 (double)
919-
def f64imm_neg1 : FPImmLeaf<f64, [{
920-
return &Imm.getSemantics() == &llvm::APFloat::IEEEdouble() &&
921-
Imm.convertToDouble() == -1.0;
922-
}]>;
923-
924911
defm FADD : F3_fma_component<"add", fadd>;
925912
defm FSUB : F3_fma_component<"sub", fsub>;
926913
defm FMUL : F3_fma_component<"mul", fmul>;
@@ -994,7 +981,7 @@ def FRCP64r :
994981
BasicNVPTXInst<(outs B64:$dst),
995982
(ins B64:$b),
996983
"rcp.rn.f64",
997-
[(set f64:$dst, (fdiv f64imm_1, f64:$b))]>;
984+
[(set f64:$dst, (fdiv fpimm_1, f64:$b))]>;
998985
def FDIV64rr :
999986
BasicNVPTXInst<(outs B64:$dst),
1000987
(ins B64:$a, B64:$b),
@@ -1008,7 +995,7 @@ def FDIV64ri :
1008995

1009996
// fdiv will be converted to rcp
1010997
// fneg (fdiv 1.0, X) => fneg (rcp.rn X)
1011-
def : Pat<(fdiv f64imm_neg1, f64:$b),
998+
def : Pat<(fdiv fpimm_neg_1, f64:$b),
1012999
(FNEGf64 (FRCP64r $b))>;
10131000

10141001
//
@@ -1025,7 +1012,7 @@ def RCP_APPROX_F32_r :
10251012
BasicFlagsNVPTXInst<(outs B32:$dst),
10261013
(ins B32:$b), (ins FTZFlag:$ftz),
10271014
"rcp.approx$ftz.f32",
1028-
[(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>;
1015+
[(set f32:$dst, (fdiv_approx fpimm_1, f32:$b))]>;
10291016

10301017
//
10311018
// F32 Approximate division
@@ -1052,7 +1039,7 @@ def fdiv_full : PatFrag<(ops node:$a, node:$b),
10521039
}]>;
10531040

10541041

1055-
def : Pat<(fdiv_full f32imm_1, f32:$b),
1042+
def : Pat<(fdiv_full fpimm_1, f32:$b),
10561043
(RCP_APPROX_F32_r $b)>;
10571044

10581045
//
@@ -1081,7 +1068,7 @@ def FRCP32r_prec :
10811068
BasicFlagsNVPTXInst<(outs B32:$dst),
10821069
(ins B32:$b), (ins FTZFlag:$ftz),
10831070
"rcp.rn$ftz.f32",
1084-
[(set f32:$dst, (fdiv_ftz f32imm_1, f32:$b))]>;
1071+
[(set f32:$dst, (fdiv_ftz fpimm_1, f32:$b))]>;
10851072
//
10861073
// F32 Accurate division
10871074
//
@@ -1096,7 +1083,7 @@ def FDIV32ri_prec :
10961083
"div.rn$ftz.f32",
10971084
[(set f32:$dst, (fdiv_ftz f32:$a, fpimm:$b))]>;
10981085

1099-
def : Pat<(fdiv f32imm_1, f32:$b), (FRCP32r_prec $b, NoFTZ)>;
1086+
def : Pat<(fdiv fpimm_1, f32:$b), (FRCP32r_prec $b, NoFTZ)>;
11001087
def : Pat<(fdiv f32:$a, f32:$b), (FDIV32rr_prec $a, $b, NoFTZ)>;
11011088
def : Pat<(fdiv f32:$a, fpimm:$b), (FDIV32ri_prec $a, fpimm:$b, NoFTZ)>;
11021089

@@ -2418,10 +2405,6 @@ foreach scope = ["sys", "gpu", "cluster", "cta"] in {
24182405
def atomic_thread_fence_release_#scope: NVPTXFenceInst<scope, "release", hasPTX<87>>;
24192406
}
24202407

2421-
def fpimm_any_zero : FPImmLeaf<fAny, [{
2422-
return Imm.isZero();
2423-
}]>;
2424-
24252408
// Perform substitution if fma only has one use, and also if instruction has
24262409
// nnan instruction flag or if the TM has NoNaNsFPMath
24272410
def NVPTX_fma_oneuse_and_nnan : PatFrag<(ops node:$a, node:$b, node:$c),
@@ -2443,11 +2426,11 @@ class FMARELUInst<RegTyInfo t, bit allow_ftz, PatFrag zero_pat>
24432426
[(set t.Ty:$dst, (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan t.Ty:$a, t.Ty:$b, t.Ty:$c), zero_pat))]>;
24442427

24452428
let Predicates = [useFP16Math, hasPTX<70>, hasSM<80>] in {
2446-
def FMARELU_F16 : FMARELUInst<F16RT, true, fpimm_any_zero>;
2429+
def FMARELU_F16 : FMARELUInst<F16RT, true, fpimm_0>;
24472430
def FMARELU_F16X2 : FMARELUInst<F16X2RT, true, zeroinitializer<v2f16>>;
24482431
}
24492432

24502433
let Predicates = [hasBF16Math, hasPTX<70>, hasSM<80>] in {
2451-
def FMARELU_BF16 : FMARELUInst<BF16RT, false, fpimm_any_zero>;
2434+
def FMARELU_BF16 : FMARELUInst<BF16RT, false, fpimm_0>;
24522435
def FMARELU_BF16X2 : FMARELUInst<BF16X2RT, false, zeroinitializer<v2bf16>>;
24532436
}

0 commit comments

Comments
 (0)