-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[NVPTX] miscellaneous minor cleanup (NFC) #152329
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NVPTX] miscellaneous minor cleanup (NFC) #152329
Conversation
@llvm/pr-subscribers-backend-nvptx Author: Alex MacLean (AlexMaclean) ChangesPatch is 82.27 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152329.diff 4 Files Affected:
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 6068035b2ee47..75e8635ec892f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -1027,6 +1027,64 @@ pickOpcodeForVT(MVT::SimpleValueType VT, std::optional<unsigned> Opcode_i16,
}
}
+static inline bool isAddLike(const SDValue V) {
+ return V.getOpcode() == ISD::ADD ||
+ (V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint());
+}
+
+// selectBaseADDR - Match a dag node which will serve as the base address for an
+// ADDR operand pair.
+static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) {
+ if (const auto *GA = dyn_cast<GlobalAddressSDNode>(N))
+ return DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N),
+ GA->getValueType(0), GA->getOffset(),
+ GA->getTargetFlags());
+ if (const auto *ES = dyn_cast<ExternalSymbolSDNode>(N))
+ return DAG->getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
+ ES->getTargetFlags());
+ if (const auto *FIN = dyn_cast<FrameIndexSDNode>(N))
+ return DAG->getTargetFrameIndex(FIN->getIndex(), FIN->getValueType(0));
+
+ return N;
+}
+
+static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) {
+ APInt AccumulatedOffset(64u, 0);
+ while (isAddLike(Addr)) {
+ const auto *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ if (!CN)
+ break;
+
+ const APInt CI = CN->getAPIntValue().sext(64);
+ if (!(CI + AccumulatedOffset).isSignedIntN(32))
+ break;
+
+ AccumulatedOffset += CI;
+ Addr = Addr->getOperand(0);
+ }
+ return DAG->getSignedTargetConstant(AccumulatedOffset.getSExtValue(), DL,
+ MVT::i32);
+}
+
+static std::pair<SDValue, SDValue> selectADDR(SDValue Addr, SelectionDAG *DAG) {
+ SDValue Offset = accumulateOffset(Addr, SDLoc(Addr), DAG);
+ SDValue Base = selectBaseADDR(Addr, DAG);
+ return {Base, Offset};
+}
+
+// Select a pair of operands which represent a valid PTX address, this could be
+// one of the following things:
+// - [var] - Offset is simply set to 0
+// - [reg] - Offset is simply set to 0
+// - [reg+immOff]
+// - [var+immOff]
+// Note that immOff must fit into a 32-bit signed integer.
+bool NVPTXDAGToDAGISel::SelectADDR(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ std::tie(Base, Offset) = selectADDR(Addr, CurDAG);
+ return true;
+}
+
bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
MemSDNode *LD = cast<MemSDNode>(N);
assert(LD->readMem() && "Expected load");
@@ -1062,8 +1120,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
FromTypeWidth <= 128 && "Invalid width for load");
// Create the machine instruction DAG
- SDValue Offset, Base;
- SelectADDR(N->getOperand(1), Base, Offset);
+ const auto [Base, Offset] = selectADDR(N->getOperand(1), CurDAG);
SDValue Ops[] = {getI32Imm(Ordering, DL),
getI32Imm(Scope, DL),
getI32Imm(CodeAddrSpace, DL),
@@ -1144,8 +1201,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
assert(isPowerOf2_32(FromTypeWidth) && FromTypeWidth >= 8 &&
FromTypeWidth <= 128 && TotalWidth <= 256 && "Invalid width for load");
- SDValue Offset, Base;
- SelectADDR(N->getOperand(1), Base, Offset);
+ const auto [Base, Offset] = selectADDR(N->getOperand(1), CurDAG);
SDValue Ops[] = {getI32Imm(Ordering, DL),
getI32Imm(Scope, DL),
getI32Imm(CodeAddrSpace, DL),
@@ -1213,8 +1269,7 @@ bool NVPTXDAGToDAGISel::tryLDG(MemSDNode *LD) {
assert(isPowerOf2_32(FromTypeWidth) && FromTypeWidth >= 8 &&
FromTypeWidth <= 128 && TotalWidth <= 256 && "Invalid width for load");
- SDValue Base, Offset;
- SelectADDR(LD->getOperand(1), Base, Offset);
+ const auto [Base, Offset] = selectADDR(LD->getOperand(1), CurDAG);
SDValue Ops[] = {getI32Imm(FromType, DL), getI32Imm(FromTypeWidth, DL), Base,
Offset, LD->getChain()};
@@ -1278,8 +1333,7 @@ bool NVPTXDAGToDAGISel::tryLDU(SDNode *N) {
SDValue Addr =
LD->getOperand(LD->getOpcode() == ISD::INTRINSIC_W_CHAIN ? 2 : 1);
- SDValue Base, Offset;
- SelectADDR(Addr, Base, Offset);
+ const auto [Base, Offset] = selectADDR(Addr, CurDAG);
SDValue Ops[] = {getI32Imm(FromTypeWidth, DL), Base, Offset, LD->getChain()};
std::optional<unsigned> Opcode;
@@ -1339,9 +1393,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
assert(isPowerOf2_32(ToTypeWidth) && ToTypeWidth >= 8 && ToTypeWidth <= 128 &&
"Invalid width for store");
- SDValue Offset, Base;
- SelectADDR(ST->getBasePtr(), Base, Offset);
-
+ const auto [Base, Offset] = selectADDR(ST->getBasePtr(), CurDAG);
SDValue Ops[] = {selectPossiblyImm(Value),
getI32Imm(Ordering, DL),
getI32Imm(Scope, DL),
@@ -1399,9 +1451,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
assert(isPowerOf2_32(ToTypeWidth) && ToTypeWidth >= 8 && ToTypeWidth <= 128 &&
TotalWidth <= 256 && "Invalid width for store");
- SDValue Offset, Base;
- SelectADDR(Addr, Base, Offset);
-
+ const auto [Base, Offset] = selectADDR(Addr, CurDAG);
Ops.append({getI32Imm(Ordering, DL), getI32Imm(Scope, DL),
getI32Imm(CodeAddrSpace, DL), getI32Imm(ToTypeWidth, DL), Base,
Offset, Chain});
@@ -1708,58 +1758,6 @@ bool NVPTXDAGToDAGISel::tryBF16ArithToFMA(SDNode *N) {
return true;
}
-static inline bool isAddLike(const SDValue V) {
- return V.getOpcode() == ISD::ADD ||
- (V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint());
-}
-
-// selectBaseADDR - Match a dag node which will serve as the base address for an
-// ADDR operand pair.
-static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) {
- if (const auto *GA = dyn_cast<GlobalAddressSDNode>(N))
- return DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N),
- GA->getValueType(0), GA->getOffset(),
- GA->getTargetFlags());
- if (const auto *ES = dyn_cast<ExternalSymbolSDNode>(N))
- return DAG->getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
- ES->getTargetFlags());
- if (const auto *FIN = dyn_cast<FrameIndexSDNode>(N))
- return DAG->getTargetFrameIndex(FIN->getIndex(), FIN->getValueType(0));
-
- return N;
-}
-
-static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) {
- APInt AccumulatedOffset(64u, 0);
- while (isAddLike(Addr)) {
- const auto *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
- if (!CN)
- break;
-
- const APInt CI = CN->getAPIntValue().sext(64);
- if (!(CI + AccumulatedOffset).isSignedIntN(32))
- break;
-
- AccumulatedOffset += CI;
- Addr = Addr->getOperand(0);
- }
- return DAG->getSignedTargetConstant(AccumulatedOffset.getSExtValue(), DL,
- MVT::i32);
-}
-
-// Select a pair of operands which represent a valid PTX address, this could be
-// one of the following things:
-// - [var] - Offset is simply set to 0
-// - [reg] - Offset is simply set to 0
-// - [reg+immOff]
-// - [var+immOff]
-// Note that immOff must fit into a 32-bit signed integer.
-bool NVPTXDAGToDAGISel::SelectADDR(SDValue Addr, SDValue &Base,
- SDValue &Offset) {
- Offset = accumulateOffset(Addr, SDLoc(Addr), CurDAG);
- Base = selectBaseADDR(Addr, CurDAG);
- return true;
-}
SDValue NVPTXDAGToDAGISel::selectPossiblyImm(SDValue V) {
if (V.getOpcode() == ISD::BITCAST)
@@ -1774,37 +1772,20 @@ SDValue NVPTXDAGToDAGISel::selectPossiblyImm(SDValue V) {
return V;
}
-bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
- unsigned int spN) const {
- const Value *Src = nullptr;
- if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
- if (spN == 0 && mN->getMemOperand()->getPseudoValue())
- return true;
- Src = mN->getMemOperand()->getValue();
- }
- if (!Src)
- return false;
- if (auto *PT = dyn_cast<PointerType>(Src->getType()))
- return (PT->getAddressSpace() == spN);
- return false;
-}
-
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) {
- SDValue Op0, Op1;
switch (ConstraintID) {
default:
return true;
- case InlineAsm::ConstraintCode::m: // memory
- if (SelectADDR(Op, Op0, Op1)) {
- OutOps.push_back(Op0);
- OutOps.push_back(Op1);
+ case InlineAsm::ConstraintCode::m: { // memory
+ const auto [Base, Offset] = selectADDR(Op, CurDAG);
+ OutOps.push_back(Base);
+ OutOps.push_back(Offset);
return false;
}
- break;
}
return true;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 9e0f88e544980..357e915fd077e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -102,8 +102,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
SDValue getPTXCmpMode(const CondCodeSDNode &CondCode);
SDValue selectPossiblyImm(SDValue V);
- bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
-
// Returns the Memory Order and Scope that the PTX memory instruction should
// use, and inserts appropriate fence instruction before the memory
// instruction, if needed to implement the instructions memory order. Required
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 6765ecb77da3a..71ae5118125b5 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -148,13 +148,16 @@ class OneUse2<SDPatternOperator operator>
: PatFrag<(ops node:$A, node:$B), (operator node:$A, node:$B), [{ return N->hasOneUse(); }]>;
-class fpimm_pos_inf<ValueType vt>
- : FPImmLeaf<vt, [{ return Imm.isPosInfinity(); }]>;
-
class zeroinitializer<ValueType vt> :
PatLeaf<(vt (bitconvert (!cast<ValueType>("i" # vt.Size) 0)))>;
+def fpimm_pos_inf : FPImmLeaf<fAny, [{ return Imm.isPosInfinity(); }]>;
+def fpimm_0 : FPImmLeaf<fAny, [{ return Imm.isZero(); }]>;
+def fpimm_1 : FPImmLeaf<fAny, [{ return Imm.isExactlyValue(1.0); }]>;
+def fpimm_neg_1 : FPImmLeaf<fAny, [{ return Imm.isExactlyValue(-1.0); }]>;
+
+
// Operands which can hold a Register or an Immediate.
//
// Unfortunately, since most register classes can hold multiple types, we must
@@ -268,7 +271,7 @@ multiclass I3Inst<string op_str, SDPatternOperator op_node, RegTyInfo t,
// The instructions are named "<OpcStr><Width>" (e.g. "add.s64").
multiclass I3<string op_str, SDPatternOperator op_node, bit commutative> {
foreach t = [I16RT, I32RT, I64RT] in
- defm t.Ty# : I3Inst<op_str # t.Size, op_node, t, commutative>;
+ defm t.Size# : I3Inst<op_str # t.Size, op_node, t, commutative>;
}
class I16x2<string OpcStr, SDNode OpNode> :
@@ -757,8 +760,8 @@ defm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube, commutative = false>;
defm MULT : I3<"mul.lo.s", mul, commutative = true>;
-defm MULTHS : I3<"mul.hi.s", mulhs, commutative = true>;
-defm MULTHU : I3<"mul.hi.u", mulhu, commutative = true>;
+defm MUL_HI_S : I3<"mul.hi.s", mulhs, commutative = true>;
+defm MUL_HI_U : I3<"mul.hi.u", mulhu, commutative = true>;
defm SDIV : I3<"div.s", sdiv, commutative = false>;
defm UDIV : I3<"div.u", udiv, commutative = false>;
@@ -875,22 +878,6 @@ let Predicates = [hasOptEnabled] in {
// Floating Point Arithmetic
//-----------------------------------
-// Constant 1.0f
-def f32imm_1 : FPImmLeaf<f32, [{
- return &Imm.getSemantics() == &llvm::APFloat::IEEEsingle() &&
- Imm.convertToFloat() == 1.0f;
-}]>;
-// Constant 1.0 (double)
-def f64imm_1 : FPImmLeaf<f64, [{
- return &Imm.getSemantics() == &llvm::APFloat::IEEEdouble() &&
- Imm.convertToDouble() == 1.0;
-}]>;
-// Constant -1.0 (double)
-def f64imm_neg1 : FPImmLeaf<f64, [{
- return &Imm.getSemantics() == &llvm::APFloat::IEEEdouble() &&
- Imm.convertToDouble() == -1.0;
-}]>;
-
defm FADD : F3_fma_component<"add", fadd>;
defm FSUB : F3_fma_component<"sub", fsub>;
defm FMUL : F3_fma_component<"mul", fmul>;
@@ -950,7 +937,7 @@ def FRCP64r :
BasicNVPTXInst<(outs B64:$dst),
(ins B64:$b),
"rcp.rn.f64",
- [(set f64:$dst, (fdiv f64imm_1, f64:$b))]>;
+ [(set f64:$dst, (fdiv fpimm_1, f64:$b))]>;
def FDIV64rr :
BasicNVPTXInst<(outs B64:$dst),
(ins B64:$a, B64:$b),
@@ -964,7 +951,7 @@ def FDIV64ri :
// fdiv will be converted to rcp
// fneg (fdiv 1.0, X) => fneg (rcp.rn X)
-def : Pat<(fdiv f64imm_neg1, f64:$b),
+def : Pat<(fdiv fpimm_neg_1, f64:$b),
(FNEGf64 (FRCP64r $b))>;
//
@@ -977,21 +964,21 @@ def fdiv_approx : PatFrag<(ops node:$a, node:$b),
}]>;
-def FRCP32_approx_r :
+def RCP_APPROX_F32_r :
BasicFlagsNVPTXInst<(outs B32:$dst),
(ins B32:$b), (ins FTZFlag:$ftz),
"rcp.approx$ftz.f32",
- [(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>;
+ [(set f32:$dst, (fdiv_approx fpimm_1, f32:$b))]>;
//
// F32 Approximate division
//
-def FDIV32_approx_rr :
+def DIV_APPROX_F32_rr :
BasicFlagsNVPTXInst<(outs B32:$dst),
(ins B32:$a, B32:$b), (ins FTZFlag:$ftz),
"div.approx$ftz.f32",
[(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>;
-def FDIV32_approx_ri :
+def DIV_APPROX_F32_ri :
BasicFlagsNVPTXInst<(outs B32:$dst),
(ins B32:$a, f32imm:$b), (ins FTZFlag:$ftz),
"div.approx$ftz.f32",
@@ -1008,8 +995,8 @@ def fdiv_full : PatFrag<(ops node:$a, node:$b),
}]>;
-def : Pat<(fdiv_full f32imm_1, f32:$b),
- (FRCP32_approx_r $b)>;
+def : Pat<(fdiv_full fpimm_1, f32:$b),
+ (RCP_APPROX_F32_r $b)>;
//
// F32 Semi-accurate division
@@ -1037,7 +1024,7 @@ def FRCP32r_prec :
BasicFlagsNVPTXInst<(outs B32:$dst),
(ins B32:$b), (ins FTZFlag:$ftz),
"rcp.rn$ftz.f32",
- [(set f32:$dst, (fdiv_ftz f32imm_1, f32:$b))]>;
+ [(set f32:$dst, (fdiv_ftz fpimm_1, f32:$b))]>;
//
// F32 Accurate division
//
@@ -1052,7 +1039,7 @@ def FDIV32ri_prec :
"div.rn$ftz.f32",
[(set f32:$dst, (fdiv_ftz f32:$a, fpimm:$b))]>;
-def : Pat<(fdiv f32imm_1, f32:$b), (FRCP32r_prec $b, NoFTZ)>;
+def : Pat<(fdiv fpimm_1, f32:$b), (FRCP32r_prec $b, NoFTZ)>;
def : Pat<(fdiv f32:$a, f32:$b), (FDIV32rr_prec $a, $b, NoFTZ)>;
def : Pat<(fdiv f32:$a, fpimm:$b), (FDIV32ri_prec $a, fpimm:$b, NoFTZ)>;
@@ -1475,9 +1462,9 @@ def MmaCode : Operand<i32> {
// Get pointer to local stack.
let hasSideEffects = false in {
def MOV_DEPOT_ADDR : NVPTXInst<(outs B32:$d), (ins i32imm:$num),
- "mov.b32 \t$d, __local_depot$num;", []>;
+ "mov.b32 \t$d, __local_depot$num;">;
def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs B64:$d), (ins i32imm:$num),
- "mov.b64 \t$d, __local_depot$num;", []>;
+ "mov.b64 \t$d, __local_depot$num;">;
}
@@ -1533,9 +1520,9 @@ def : Pat<(i64 externalsym:$dst), (MOV_B64_i (to_texternsym $dst))>;
//---- Copy Frame Index ----
def LEA_ADDRi : NVPTXInst<(outs B32:$dst), (ins ADDR:$addr),
- "add.u32 \t$dst, ${addr:add};", []>;
+ "add.u32 \t$dst, ${addr:add};">;
def LEA_ADDRi64 : NVPTXInst<(outs B64:$dst), (ins ADDR:$addr),
- "add.u64 \t$dst, ${addr:add};", []>;
+ "add.u64 \t$dst, ${addr:add};">;
def : Pat<(i32 frameindex:$fi), (LEA_ADDRi (to_tframeindex $fi), 0)>;
def : Pat<(i64 frameindex:$fi), (LEA_ADDRi64 (to_tframeindex $fi), 0)>;
@@ -1612,12 +1599,12 @@ foreach is_convergent = [0, 1] in {
NVPTXInst<(outs),
(ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params,
i32imm:$proto),
- "call${rets:RetList} $addr, (${params:ParamList}), prototype_$proto;", []>;
+ "call${rets:RetList} $addr, (${params:ParamList}), prototype_$proto;">;
def CALL_UNI # convergent_suffix :
NVPTXInst<(outs),
(ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params),
- "call.uni${rets:RetList} $addr, (${params:ParamList});", []>;
+ "call.uni${rets:RetList} $addr, (${params:ParamList});">;
}
defvar call_inst = !cast<NVPTXInst>("CALL" # convergent_suffix);
@@ -1633,10 +1620,10 @@ foreach is_convergent = [0, 1] in {
def DECLARE_PARAM_array :
NVPTXInst<(outs), (ins i32imm:$a, i32imm:$align, i32imm:$size),
- ".param .align $align .b8 \t$a[$size];", []>;
+ ".param .align $align .b8 \t$a[$size];">;
def DECLARE_PARAM_scalar :
NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
- ".param .b$size \t$a;", []>;
+ ".param .b$size \t$a;">;
def : Pat<(declare_array_param externalsym:$a, imm:$align, imm:$size),
(DECLARE_PARAM_array (to_texternsym $a), imm:$align, imm:$size)>;
@@ -1709,7 +1696,7 @@ class LD<NVPTXRegClass regclass>
(ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, AtomicCode:$Sign,
i32imm:$fromWidth, ADDR:$addr),
"ld${sem:sem}${scope:scope}${addsp:addsp}.${Sign:sign}$fromWidth "
- "\t$dst, [$addr];", []>;
+ "\t$dst, [$addr];">;
let mayLoad=1, hasSideEffects=0 in {
def LD_i16 : LD<B16>;
@@ -1724,7 +1711,7 @@ class ST<DAGOperand O>
AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$toWidth,
ADDR:$addr),
"st${sem:sem}${scope:scope}${addsp:addsp}.b$toWidth"
- " \t[$addr], $src;", []>;
+ " \t[$addr], $src;">;
let mayStore=1, hasSideEffects=0 in {
def ST_i16 : ST<RI16>;
@@ -1741,13 +1728,13 @@ multiclass LD_VEC<NVPTXRegClass regclass, bit support_v8 = false> {
(ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp,
AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
"ld${sem:sem}${scope:scope}${addsp:addsp}.v2.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2}}, [$addr];", []>;
+ "\t{{$dst1, $dst2}}, [$addr];">;
def _v4 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp,
AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
"ld${sem:sem}${scope:scope}${addsp:addsp}.v4.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
+ "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];">;
if support_v8 then
def _v8 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4,
@@ -1756,7 +1743,7 @@ multiclass LD_VEC<NVPTXRegClass regclass, bit support_v8 = false> {
i32imm:$fromWidth, ADDR:$addr),
"ld${sem:sem}${scope:scope}${addsp:addsp}.v8.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2, $dst3, $dst4, $dst5, $dst6, $dst7, $dst8}}, "
- "[$addr];", []>;
+ "[$addr];">;
}
let mayLoad=1, hasSideEffects=0 in {
defm LDV_i16 : LD_VEC<B16>;
@@ -1771,14 +1758,14 @@ multiclass ST_VEC<DAGOperand O, bit support_v8 = false> {
AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$fromWidth,
ADDR:$addr),
"st${sem:sem}${scope:scope}${addsp:addsp}.v2.b$fromWidth "
- "\t[$addr], {{$src1, $src2}};", []>;
+ "\t[$addr], {{$src1, $src2}};">;
def _v4 : NVPTXInst<
(outs),
(ins O:$src1, O:$src2, O:$src3, O:$src4,
AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$fromWidth,
ADDR:$addr),
"st${sem:sem}${scope:scope}${addsp:addsp}.v4.b$fromWidth "
- "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
+ "\t[$addr], {{$src1, $src2, $src3, $src4}};">;
if support_v8 then
def _v8 : NVPTXInst<
(outs),
@@ -1788,7 +1775,7 @@ multiclass ST_VEC<DAGOperand O, bit support_v8 = false> {
...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
77706ef
to
051dca0
Compare
051dca0
to
330dfd4
Compare
09a8572
to
8b5e295
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are a few distinct cleanup changes you're making throughout the PR. Could you outline that in the PR description?
def INT_NVVM_WGMMA_WAIT_GROUP_SYNC_ALIGNED : BasicNVPTXInst<(outs), (ins i64imm:$n), "wgmma.wait_group.sync.aligned", | ||
[(int_nvvm_wgmma_wait_group_sync_aligned timm:$n)]>, Requires<[hasSM90a, hasPTX<80>]>; | ||
def WGMMA_WAIT_GROUP_SYNC_ALIGNED : BasicNVPTXInst<(outs), (ins i64imm:$n), "wgmma.wait_group.sync.aligned", | ||
[(int_nvvm_wgmma_wait_group_sync_aligned timm:$n)]>; | ||
} // isConvergent = true |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: update/delete comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
updated
if (spN == 0 && mN->getMemOperand()->getPseudoValue()) | ||
return true; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's a pseudo value, and why don't we need this case any longer?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not really sure. In practice I think that for the cases where this pattern is used, this code might be dead. At least it doesn't seem to cause any issues to remove it...
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/195/builds/13155 Here is the relevant piece of the build log for the reference
|
No description provided.