[NVPTX] miscellaneous minor cleanup (NFC) #152329

AlexMaclean · 2025-08-06T16:00:42Z

No description provided.

llvmbot · 2025-08-06T16:01:14Z

@llvm/pr-subscribers-backend-nvptx

Author: Alex MacLean (AlexMaclean)

Changes

Patch is 82.27 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152329.diff

4 Files Affected:

(modified) llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp (+68-87)
(modified) llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h (-2)
(modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (+55-76)
(modified) llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (+253-487)

diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 6068035b2ee47..75e8635ec892f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -1027,6 +1027,64 @@ pickOpcodeForVT(MVT::SimpleValueType VT, std::optional<unsigned> Opcode_i16,
   }
 }
 
+static inline bool isAddLike(const SDValue V) {
+  return V.getOpcode() == ISD::ADD ||
+         (V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint());
+}
+
+// selectBaseADDR - Match a dag node which will serve as the base address for an
+// ADDR operand pair.
+static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) {
+  if (const auto *GA = dyn_cast<GlobalAddressSDNode>(N))
+    return DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N),
+                                       GA->getValueType(0), GA->getOffset(),
+                                       GA->getTargetFlags());
+  if (const auto *ES = dyn_cast<ExternalSymbolSDNode>(N))
+    return DAG->getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
+                                        ES->getTargetFlags());
+  if (const auto *FIN = dyn_cast<FrameIndexSDNode>(N))
+    return DAG->getTargetFrameIndex(FIN->getIndex(), FIN->getValueType(0));
+
+  return N;
+}
+
+static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) {
+  APInt AccumulatedOffset(64u, 0);
+  while (isAddLike(Addr)) {
+    const auto *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+    if (!CN)
+      break;
+
+    const APInt CI = CN->getAPIntValue().sext(64);
+    if (!(CI + AccumulatedOffset).isSignedIntN(32))
+      break;
+
+    AccumulatedOffset += CI;
+    Addr = Addr->getOperand(0);
+  }
+  return DAG->getSignedTargetConstant(AccumulatedOffset.getSExtValue(), DL,
+                                      MVT::i32);
+}
+
+static std::pair<SDValue, SDValue> selectADDR(SDValue Addr, SelectionDAG *DAG) {
+  SDValue Offset = accumulateOffset(Addr, SDLoc(Addr), DAG);
+  SDValue Base = selectBaseADDR(Addr, DAG);
+  return {Base, Offset};
+}
+
+// Select a pair of operands which represent a valid PTX address, this could be
+// one of the following things:
+//  - [var] - Offset is simply set to 0
+//  - [reg] - Offset is simply set to 0
+//  - [reg+immOff]
+//  - [var+immOff]
+// Note that immOff must fit into a 32-bit signed integer.
+bool NVPTXDAGToDAGISel::SelectADDR(SDValue Addr, SDValue &Base,
+                                   SDValue &Offset) {
+  std::tie(Base, Offset) = selectADDR(Addr, CurDAG);
+  return true;
+}
+
 bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
   MemSDNode *LD = cast<MemSDNode>(N);
   assert(LD->readMem() && "Expected load");
@@ -1062,8 +1120,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
          FromTypeWidth <= 128 && "Invalid width for load");
 
   // Create the machine instruction DAG
-  SDValue Offset, Base;
-  SelectADDR(N->getOperand(1), Base, Offset);
+  const auto [Base, Offset] = selectADDR(N->getOperand(1), CurDAG);
   SDValue Ops[] = {getI32Imm(Ordering, DL),
                    getI32Imm(Scope, DL),
                    getI32Imm(CodeAddrSpace, DL),
@@ -1144,8 +1201,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
   assert(isPowerOf2_32(FromTypeWidth) && FromTypeWidth >= 8 &&
          FromTypeWidth <= 128 && TotalWidth <= 256 && "Invalid width for load");
 
-  SDValue Offset, Base;
-  SelectADDR(N->getOperand(1), Base, Offset);
+  const auto [Base, Offset] = selectADDR(N->getOperand(1), CurDAG);
   SDValue Ops[] = {getI32Imm(Ordering, DL),
                    getI32Imm(Scope, DL),
                    getI32Imm(CodeAddrSpace, DL),
@@ -1213,8 +1269,7 @@ bool NVPTXDAGToDAGISel::tryLDG(MemSDNode *LD) {
   assert(isPowerOf2_32(FromTypeWidth) && FromTypeWidth >= 8 &&
          FromTypeWidth <= 128 && TotalWidth <= 256 && "Invalid width for load");
 
-  SDValue Base, Offset;
-  SelectADDR(LD->getOperand(1), Base, Offset);
+  const auto [Base, Offset] = selectADDR(LD->getOperand(1), CurDAG);
   SDValue Ops[] = {getI32Imm(FromType, DL), getI32Imm(FromTypeWidth, DL), Base,
                    Offset, LD->getChain()};
 
@@ -1278,8 +1333,7 @@ bool NVPTXDAGToDAGISel::tryLDU(SDNode *N) {
   SDValue Addr =
       LD->getOperand(LD->getOpcode() == ISD::INTRINSIC_W_CHAIN ? 2 : 1);
 
-  SDValue Base, Offset;
-  SelectADDR(Addr, Base, Offset);
+  const auto [Base, Offset] = selectADDR(Addr, CurDAG);
   SDValue Ops[] = {getI32Imm(FromTypeWidth, DL), Base, Offset, LD->getChain()};
 
   std::optional<unsigned> Opcode;
@@ -1339,9 +1393,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
   assert(isPowerOf2_32(ToTypeWidth) && ToTypeWidth >= 8 && ToTypeWidth <= 128 &&
          "Invalid width for store");
 
-  SDValue Offset, Base;
-  SelectADDR(ST->getBasePtr(), Base, Offset);
-
+  const auto [Base, Offset] = selectADDR(ST->getBasePtr(), CurDAG);
   SDValue Ops[] = {selectPossiblyImm(Value),
                    getI32Imm(Ordering, DL),
                    getI32Imm(Scope, DL),
@@ -1399,9 +1451,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
   assert(isPowerOf2_32(ToTypeWidth) && ToTypeWidth >= 8 && ToTypeWidth <= 128 &&
          TotalWidth <= 256 && "Invalid width for store");
 
-  SDValue Offset, Base;
-  SelectADDR(Addr, Base, Offset);
-
+  const auto [Base, Offset] = selectADDR(Addr, CurDAG);
   Ops.append({getI32Imm(Ordering, DL), getI32Imm(Scope, DL),
               getI32Imm(CodeAddrSpace, DL), getI32Imm(ToTypeWidth, DL), Base,
               Offset, Chain});
@@ -1708,58 +1758,6 @@ bool NVPTXDAGToDAGISel::tryBF16ArithToFMA(SDNode *N) {
   return true;
 }
 
-static inline bool isAddLike(const SDValue V) {
-  return V.getOpcode() == ISD::ADD ||
-         (V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint());
-}
-
-// selectBaseADDR - Match a dag node which will serve as the base address for an
-// ADDR operand pair.
-static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) {
-  if (const auto *GA = dyn_cast<GlobalAddressSDNode>(N))
-    return DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N),
-                                       GA->getValueType(0), GA->getOffset(),
-                                       GA->getTargetFlags());
-  if (const auto *ES = dyn_cast<ExternalSymbolSDNode>(N))
-    return DAG->getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
-                                        ES->getTargetFlags());
-  if (const auto *FIN = dyn_cast<FrameIndexSDNode>(N))
-    return DAG->getTargetFrameIndex(FIN->getIndex(), FIN->getValueType(0));
-
-  return N;
-}
-
-static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) {
-  APInt AccumulatedOffset(64u, 0);
-  while (isAddLike(Addr)) {
-    const auto *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
-    if (!CN)
-      break;
-
-    const APInt CI = CN->getAPIntValue().sext(64);
-    if (!(CI + AccumulatedOffset).isSignedIntN(32))
-      break;
-
-    AccumulatedOffset += CI;
-    Addr = Addr->getOperand(0);
-  }
-  return DAG->getSignedTargetConstant(AccumulatedOffset.getSExtValue(), DL,
-                                      MVT::i32);
-}
-
-// Select a pair of operands which represent a valid PTX address, this could be
-// one of the following things:
-//  - [var] - Offset is simply set to 0
-//  - [reg] - Offset is simply set to 0
-//  - [reg+immOff]
-//  - [var+immOff]
-// Note that immOff must fit into a 32-bit signed integer.
-bool NVPTXDAGToDAGISel::SelectADDR(SDValue Addr, SDValue &Base,
-                                   SDValue &Offset) {
-  Offset = accumulateOffset(Addr, SDLoc(Addr), CurDAG);
-  Base = selectBaseADDR(Addr, CurDAG);
-  return true;
-}
 
 SDValue NVPTXDAGToDAGISel::selectPossiblyImm(SDValue V) {
   if (V.getOpcode() == ISD::BITCAST)
@@ -1774,37 +1772,20 @@ SDValue NVPTXDAGToDAGISel::selectPossiblyImm(SDValue V) {
   return V;
 }
 
-bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
-                                                 unsigned int spN) const {
-  const Value *Src = nullptr;
-  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
-    if (spN == 0 && mN->getMemOperand()->getPseudoValue())
-      return true;
-    Src = mN->getMemOperand()->getValue();
-  }
-  if (!Src)
-    return false;
-  if (auto *PT = dyn_cast<PointerType>(Src->getType()))
-    return (PT->getAddressSpace() == spN);
-  return false;
-}
-
 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 /// inline asm expressions.
 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
     const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
     std::vector<SDValue> &OutOps) {
-  SDValue Op0, Op1;
   switch (ConstraintID) {
   default:
     return true;
-  case InlineAsm::ConstraintCode::m: // memory
-    if (SelectADDR(Op, Op0, Op1)) {
-      OutOps.push_back(Op0);
-      OutOps.push_back(Op1);
+  case InlineAsm::ConstraintCode::m: { // memory 
+      const auto [Base, Offset] = selectADDR(Op, CurDAG);
+      OutOps.push_back(Base);
+      OutOps.push_back(Offset);
       return false;
     }
-    break;
   }
   return true;
 }
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 9e0f88e544980..357e915fd077e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -102,8 +102,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
   SDValue getPTXCmpMode(const CondCodeSDNode &CondCode);
   SDValue selectPossiblyImm(SDValue V);
 
-  bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
-
   // Returns the Memory Order and Scope that the PTX memory instruction should
   // use, and inserts appropriate fence instruction before the memory
   // instruction, if needed to implement the instructions memory order. Required
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 6765ecb77da3a..71ae5118125b5 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -148,13 +148,16 @@ class OneUse2<SDPatternOperator operator>
     : PatFrag<(ops node:$A, node:$B), (operator node:$A, node:$B), [{ return N->hasOneUse(); }]>;
 
 
-class fpimm_pos_inf<ValueType vt>
-    : FPImmLeaf<vt, [{ return Imm.isPosInfinity(); }]>;
-
 class zeroinitializer<ValueType vt> : 
   PatLeaf<(vt (bitconvert (!cast<ValueType>("i" # vt.Size) 0)))>;
 
 
+def fpimm_pos_inf : FPImmLeaf<fAny, [{ return Imm.isPosInfinity(); }]>;
+def fpimm_0 : FPImmLeaf<fAny, [{ return Imm.isZero(); }]>;
+def fpimm_1 : FPImmLeaf<fAny, [{ return Imm.isExactlyValue(1.0); }]>;
+def fpimm_neg_1 : FPImmLeaf<fAny, [{ return Imm.isExactlyValue(-1.0); }]>;
+
+
 // Operands which can hold a Register or an Immediate.
 //
 // Unfortunately, since most register classes can hold multiple types, we must
@@ -268,7 +271,7 @@ multiclass I3Inst<string op_str, SDPatternOperator op_node, RegTyInfo t,
 // The instructions are named "<OpcStr><Width>" (e.g. "add.s64").
 multiclass I3<string op_str, SDPatternOperator op_node, bit commutative> {
   foreach t = [I16RT, I32RT, I64RT] in
-    defm t.Ty# : I3Inst<op_str # t.Size, op_node, t, commutative>;
+    defm t.Size# : I3Inst<op_str # t.Size, op_node, t, commutative>;
 }
 
 class I16x2<string OpcStr, SDNode OpNode> :
@@ -757,8 +760,8 @@ defm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube, commutative = false>;
 
 defm MULT : I3<"mul.lo.s", mul, commutative = true>;
 
-defm MULTHS : I3<"mul.hi.s", mulhs, commutative = true>;
-defm MULTHU : I3<"mul.hi.u", mulhu, commutative = true>;
+defm MUL_HI_S : I3<"mul.hi.s", mulhs, commutative = true>;
+defm MUL_HI_U : I3<"mul.hi.u", mulhu, commutative = true>;
 
 defm SDIV : I3<"div.s", sdiv, commutative = false>;
 defm UDIV : I3<"div.u", udiv, commutative = false>;
@@ -875,22 +878,6 @@ let Predicates = [hasOptEnabled] in {
 // Floating Point Arithmetic
 //-----------------------------------
 
-// Constant 1.0f
-def f32imm_1 : FPImmLeaf<f32, [{
-  return &Imm.getSemantics() == &llvm::APFloat::IEEEsingle() &&
-         Imm.convertToFloat() == 1.0f;
-}]>;
-// Constant 1.0 (double)
-def f64imm_1 : FPImmLeaf<f64, [{
-  return &Imm.getSemantics() == &llvm::APFloat::IEEEdouble() &&
-         Imm.convertToDouble() == 1.0;
-}]>;
-// Constant -1.0 (double)
-def f64imm_neg1 : FPImmLeaf<f64, [{
-  return &Imm.getSemantics() == &llvm::APFloat::IEEEdouble() &&
-         Imm.convertToDouble() == -1.0;
-}]>;
-
 defm FADD : F3_fma_component<"add", fadd>;
 defm FSUB : F3_fma_component<"sub", fsub>;
 defm FMUL : F3_fma_component<"mul", fmul>;
@@ -950,7 +937,7 @@ def FRCP64r :
   BasicNVPTXInst<(outs B64:$dst),
                  (ins B64:$b),
                  "rcp.rn.f64",
-                 [(set f64:$dst, (fdiv f64imm_1, f64:$b))]>;
+                 [(set f64:$dst, (fdiv fpimm_1, f64:$b))]>;
 def FDIV64rr :
   BasicNVPTXInst<(outs B64:$dst),
                  (ins B64:$a, B64:$b),
@@ -964,7 +951,7 @@ def FDIV64ri :
 
 // fdiv will be converted to rcp
 // fneg (fdiv 1.0, X) => fneg (rcp.rn X)
-def : Pat<(fdiv f64imm_neg1, f64:$b),
+def : Pat<(fdiv fpimm_neg_1, f64:$b),
           (FNEGf64 (FRCP64r $b))>;
 
 //
@@ -977,21 +964,21 @@ def fdiv_approx : PatFrag<(ops node:$a, node:$b),
 }]>;
 
 
-def FRCP32_approx_r :
+def RCP_APPROX_F32_r :
   BasicFlagsNVPTXInst<(outs B32:$dst),
                  (ins B32:$b), (ins FTZFlag:$ftz),
                  "rcp.approx$ftz.f32",
-                 [(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>;
+                 [(set f32:$dst, (fdiv_approx fpimm_1, f32:$b))]>;
 
 //
 // F32 Approximate division
 //
-def FDIV32_approx_rr :
+def DIV_APPROX_F32_rr :
   BasicFlagsNVPTXInst<(outs B32:$dst),
                  (ins B32:$a, B32:$b), (ins FTZFlag:$ftz),
                  "div.approx$ftz.f32",
                  [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>;
-def FDIV32_approx_ri :
+def DIV_APPROX_F32_ri :
   BasicFlagsNVPTXInst<(outs B32:$dst),
                  (ins B32:$a, f32imm:$b), (ins FTZFlag:$ftz),
                  "div.approx$ftz.f32",
@@ -1008,8 +995,8 @@ def fdiv_full : PatFrag<(ops node:$a, node:$b),
 }]>;
 
 
-def : Pat<(fdiv_full f32imm_1, f32:$b),
-          (FRCP32_approx_r $b)>;
+def : Pat<(fdiv_full fpimm_1, f32:$b),
+          (RCP_APPROX_F32_r $b)>;
 
 //
 // F32 Semi-accurate division
@@ -1037,7 +1024,7 @@ def FRCP32r_prec :
   BasicFlagsNVPTXInst<(outs B32:$dst),
                  (ins B32:$b), (ins FTZFlag:$ftz),
                  "rcp.rn$ftz.f32",
-                 [(set f32:$dst, (fdiv_ftz f32imm_1, f32:$b))]>;
+                 [(set f32:$dst, (fdiv_ftz fpimm_1, f32:$b))]>;
 //
 // F32 Accurate division
 //
@@ -1052,7 +1039,7 @@ def FDIV32ri_prec :
                  "div.rn$ftz.f32",
                  [(set f32:$dst, (fdiv_ftz f32:$a, fpimm:$b))]>;
 
-def : Pat<(fdiv f32imm_1, f32:$b), (FRCP32r_prec $b, NoFTZ)>;
+def : Pat<(fdiv fpimm_1, f32:$b), (FRCP32r_prec $b, NoFTZ)>;
 def : Pat<(fdiv f32:$a, f32:$b), (FDIV32rr_prec $a, $b, NoFTZ)>;
 def : Pat<(fdiv f32:$a, fpimm:$b), (FDIV32ri_prec $a, fpimm:$b, NoFTZ)>;
 
@@ -1475,9 +1462,9 @@ def MmaCode : Operand<i32> {
 // Get pointer to local stack.
 let hasSideEffects = false in {
   def MOV_DEPOT_ADDR :    NVPTXInst<(outs B32:$d), (ins i32imm:$num),
-                                     "mov.b32 \t$d, __local_depot$num;", []>;
+                                     "mov.b32 \t$d, __local_depot$num;">;
   def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs B64:$d), (ins i32imm:$num),
-                                    "mov.b64 \t$d, __local_depot$num;", []>;
+                                    "mov.b64 \t$d, __local_depot$num;">;
 }
 
 
@@ -1533,9 +1520,9 @@ def : Pat<(i64 externalsym:$dst), (MOV_B64_i (to_texternsym $dst))>;
 
 //---- Copy Frame Index ----
 def LEA_ADDRi :   NVPTXInst<(outs B32:$dst), (ins ADDR:$addr),
-                            "add.u32 \t$dst, ${addr:add};", []>;
+                            "add.u32 \t$dst, ${addr:add};">;
 def LEA_ADDRi64 : NVPTXInst<(outs B64:$dst), (ins ADDR:$addr),
-                            "add.u64 \t$dst, ${addr:add};", []>;
+                            "add.u64 \t$dst, ${addr:add};">;
 
 def : Pat<(i32 frameindex:$fi), (LEA_ADDRi (to_tframeindex $fi), 0)>;
 def : Pat<(i64 frameindex:$fi), (LEA_ADDRi64 (to_tframeindex $fi), 0)>;
@@ -1612,12 +1599,12 @@ foreach is_convergent = [0, 1] in {
       NVPTXInst<(outs),
                 (ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params, 
                      i32imm:$proto),
-                "call${rets:RetList} $addr, (${params:ParamList}), prototype_$proto;", []>;
+                "call${rets:RetList} $addr, (${params:ParamList}), prototype_$proto;">;
 
     def CALL_UNI # convergent_suffix :
       NVPTXInst<(outs),
                 (ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params),
-                "call.uni${rets:RetList} $addr, (${params:ParamList});", []>;
+                "call.uni${rets:RetList} $addr, (${params:ParamList});">;
   }
 
   defvar call_inst = !cast<NVPTXInst>("CALL" # convergent_suffix);
@@ -1633,10 +1620,10 @@ foreach is_convergent = [0, 1] in {
 
 def DECLARE_PARAM_array :
   NVPTXInst<(outs), (ins i32imm:$a, i32imm:$align, i32imm:$size),
-            ".param .align $align .b8 \t$a[$size];", []>;
+            ".param .align $align .b8 \t$a[$size];">;
 def DECLARE_PARAM_scalar :
   NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
-            ".param .b$size \t$a;", []>;
+            ".param .b$size \t$a;">;
 
 def : Pat<(declare_array_param externalsym:$a, imm:$align, imm:$size),
           (DECLARE_PARAM_array (to_texternsym $a), imm:$align, imm:$size)>;
@@ -1709,7 +1696,7 @@ class LD<NVPTXRegClass regclass>
     (ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, AtomicCode:$Sign,
          i32imm:$fromWidth, ADDR:$addr),
     "ld${sem:sem}${scope:scope}${addsp:addsp}.${Sign:sign}$fromWidth "
-    "\t$dst, [$addr];", []>;
+    "\t$dst, [$addr];">;
 
 let mayLoad=1, hasSideEffects=0 in {
   def LD_i16 : LD<B16>;
@@ -1724,7 +1711,7 @@ class ST<DAGOperand O>
          AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$toWidth,
          ADDR:$addr),
     "st${sem:sem}${scope:scope}${addsp:addsp}.b$toWidth"
-    " \t[$addr], $src;", []>;
+    " \t[$addr], $src;">;
 
 let mayStore=1, hasSideEffects=0 in {
   def ST_i16 : ST<RI16>;
@@ -1741,13 +1728,13 @@ multiclass LD_VEC<NVPTXRegClass regclass, bit support_v8 = false> {
     (ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp,
          AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
     "ld${sem:sem}${scope:scope}${addsp:addsp}.v2.${Sign:sign}$fromWidth "
-    "\t{{$dst1, $dst2}}, [$addr];", []>;
+    "\t{{$dst1, $dst2}}, [$addr];">;
   def _v4 : NVPTXInst<
     (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
     (ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp,
          AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
     "ld${sem:sem}${scope:scope}${addsp:addsp}.v4.${Sign:sign}$fromWidth "
-    "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
+    "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];">;
   if support_v8 then
     def _v8 : NVPTXInst<
       (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4,
@@ -1756,7 +1743,7 @@ multiclass LD_VEC<NVPTXRegClass regclass, bit support_v8 = false> {
            i32imm:$fromWidth, ADDR:$addr),
       "ld${sem:sem}${scope:scope}${addsp:addsp}.v8.${Sign:sign}$fromWidth "
       "\t{{$dst1, $dst2, $dst3, $dst4, $dst5, $dst6, $dst7, $dst8}}, "
-      "[$addr];", []>;
+      "[$addr];">;
 }
 let mayLoad=1, hasSideEffects=0 in {
   defm LDV_i16 : LD_VEC<B16>;
@@ -1771,14 +1758,14 @@ multiclass ST_VEC<DAGOperand O, bit support_v8 = false> {
          AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$fromWidth,
          ADDR:$addr),
     "st${sem:sem}${scope:scope}${addsp:addsp}.v2.b$fromWidth "
-    "\t[$addr], {{$src1, $src2}};", []>;
+    "\t[$addr], {{$src1, $src2}};">;
   def _v4 : NVPTXInst<
     (outs),
     (ins O:$src1, O:$src2, O:$src3, O:$src4,
          AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$fromWidth,
          ADDR:$addr),
     "st${sem:sem}${scope:scope}${addsp:addsp}.v4.b$fromWidth "
-    "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
+    "\t[$addr], {{$src1, $src2, $src3, $src4}};">;
   if support_v8 then
     def _v8 : NVPTXInst<
       (outs),
@@ -1788,7 +1775,7 @@ multiclass ST_VEC<DAGOperand O, bit support_v8 = false> {
        ...
[truncated]

github-actions · 2025-08-06T16:03:55Z

✅ With the latest revision this PR passed the C/C++ code formatter.

justinfargnoli

There are a few distinct cleanup changes you're making throughout the PR. Could you outline that in the PR description?

justinfargnoli · 2025-08-11T18:31:15Z

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

-def INT_NVVM_WGMMA_WAIT_GROUP_SYNC_ALIGNED : BasicNVPTXInst<(outs), (ins i64imm:$n), "wgmma.wait_group.sync.aligned",
-                             [(int_nvvm_wgmma_wait_group_sync_aligned timm:$n)]>, Requires<[hasSM90a, hasPTX<80>]>;
+def WGMMA_WAIT_GROUP_SYNC_ALIGNED : BasicNVPTXInst<(outs), (ins i64imm:$n), "wgmma.wait_group.sync.aligned",
+                             [(int_nvvm_wgmma_wait_group_sync_aligned timm:$n)]>;
 } // isConvergent = true


Nit: update/delete comment

justinfargnoli · 2025-08-11T18:39:30Z

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

-    if (spN == 0 && mN->getMemOperand()->getPseudoValue())
-      return true;


What's a pseudo value, and why don't we need this case any longer?

I'm not really sure. In practice I think that for the cases where this pattern is used, this code might be dead. At least it doesn't seem to cause any issues to remove it...

llvm-ci · 2025-08-13T02:33:40Z

LLVM Buildbot has detected a new failure on builder lldb-remote-linux-ubuntu running on as-builder-9 while building llvm at step 16 "test-check-lldb-api".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/195/builds/13155

Here is the relevant piece of the build log for the reference

Step 16 (test-check-lldb-api) failure: Test just built components: check-lldb-api completed (failure)
...
PASS: lldb-api :: functionalities/data-formatter/data-formatter-stl/generic/variant/TestDataFormatterStdVariant.py (431 of 1293)
PASS: lldb-api :: functionalities/data-formatter/frameformat_smallstruct/TestFrameFormatSmallStruct.py (432 of 1293)
PASS: lldb-api :: functionalities/data-formatter/hexcaps/TestDataFormatterHexCaps.py (433 of 1293)
PASS: lldb-api :: functionalities/data-formatter/language_category_updates/TestDataFormatterLanguageCategoryUpdates.py (434 of 1293)
UNSUPPORTED: lldb-api :: functionalities/data-formatter/nsarraysynth/TestNSArraySynthetic.py (435 of 1293)
UNSUPPORTED: lldb-api :: functionalities/data-formatter/nsdictionarysynth/TestNSDictionarySynthetic.py (436 of 1293)
UNSUPPORTED: lldb-api :: functionalities/data-formatter/nssetsynth/TestNSSetSynthetic.py (437 of 1293)
UNSUPPORTED: lldb-api :: functionalities/data-formatter/ostypeformatting/TestFormattersOsType.py (438 of 1293)
UNSUPPORTED: lldb-api :: functionalities/data-formatter/poarray/TestPrintObjectArray.py (439 of 1293)
UNRESOLVED: lldb-api :: functionalities/data-formatter/data-formatter-stl/generic/vector/TestDataFormatterStdVector.py (440 of 1293)
******************** TEST 'lldb-api :: functionalities/data-formatter/data-formatter-stl/generic/vector/TestDataFormatterStdVector.py' FAILED ********************
Script:
--
/usr/bin/python3.12 /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/./lib --env LLVM_INCLUDE_DIR=/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/include --env LLVM_TOOLS_DIR=/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/./bin --libcxx-include-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/include/c++/v1 --libcxx-include-target-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/include/aarch64-unknown-linux-gnu/c++/v1 --libcxx-library-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/./lib/aarch64-unknown-linux-gnu --arch aarch64 --build-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/lldb-test-build.noindex --lldb-module-cache-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/./bin/lldb --compiler /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/bin/clang --dsymutil /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/./bin/dsymutil --make /usr/bin/gmake --llvm-tools-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/./bin --lldb-obj-root /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/tools/lldb --lldb-libs-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/./lib --cmake-build-type Release --platform-url connect://jetson-agx-2198.lab.llvm.org:1234 --platform-working-dir /home/ubuntu/lldb-tests --sysroot /mnt/fs/jetson-agx-ubuntu --env ARCH_CFLAGS=-mcpu=cortex-a78 --platform-name remote-linux --skip-category=lldb-server /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/llvm-project/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/vector -p TestDataFormatterStdVector.py
--
Exit Code: -11

Command Output (stdout):
--
lldb version 22.0.0git (https://github.com/llvm/llvm-project.git revision 9e6b29137b92226e484d0328df79a04c39f01763)
  clang revision 9e6b29137b92226e484d0328df79a04c39f01763
  llvm revision 9e6b29137b92226e484d0328df79a04c39f01763

--
Command Output (stderr):
--
WARNING:root:Custom libc++ is not supported for remote runs: ignoring --libcxx arguments
UNSUPPORTED: LLDB (/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/bin/clang-aarch64) :: test_libcxx_dsym (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_libcxx_dsym) (test case does not fall in any category of interest for this run) 
PASS: LLDB (/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/bin/clang-aarch64) :: test_libcxx_dwarf (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_libcxx_dwarf)
PASS: LLDB (/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/bin/clang-aarch64) :: test_libcxx_dwo (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_libcxx_dwo)
UNSUPPORTED: LLDB (/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/bin/clang-aarch64) :: test_libstdcxx_debug_dsym (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_libstdcxx_debug_dsym) (test case does not fall in any category of interest for this run) 
PASS: LLDB (/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/bin/clang-aarch64) :: test_libstdcxx_debug_dwarf (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_libstdcxx_debug_dwarf)
PASS: LLDB (/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/bin/clang-aarch64) :: test_libstdcxx_debug_dwo (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_libstdcxx_debug_dwo)
UNSUPPORTED: LLDB (/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/bin/clang-aarch64) :: test_libstdcxx_dsym (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_libstdcxx_dsym) (test case does not fall in any category of interest for this run) 
PASS: LLDB (/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/bin/clang-aarch64) :: test_libstdcxx_dwarf (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_libstdcxx_dwarf)
PASS: LLDB (/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/bin/clang-aarch64) :: test_libstdcxx_dwo (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_libstdcxx_dwo)
UNSUPPORTED: LLDB (/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/bin/clang-aarch64) :: test_msvcstl_dsym (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_msvcstl_dsym) (test case does not fall in any category of interest for this run) 
UNSUPPORTED: LLDB (/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/bin/clang-aarch64) :: test_msvcstl_dwarf (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_msvcstl_dwarf) (test case does not fall in any category of interest for this run) 
UNSUPPORTED: LLDB (/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/bin/clang-aarch64) :: test_msvcstl_dwo (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_msvcstl_dwo) (test case does not fall in any category of interest for this run) 
UNSUPPORTED: LLDB (/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/bin/clang-aarch64) :: test_ref_and_ptr_libcxx_dsym (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_ref_and_ptr_libcxx_dsym) (test case does not fall in any category of interest for this run) 
PASS: LLDB (/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/bin/clang-aarch64) :: test_ref_and_ptr_libcxx_dwarf (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_ref_and_ptr_libcxx_dwarf)
PASS: LLDB (/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/bin/clang-aarch64) :: test_ref_and_ptr_libcxx_dwo (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_ref_and_ptr_libcxx_dwo)
UNSUPPORTED: LLDB (/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/bin/clang-aarch64) :: test_ref_and_ptr_libstdcxx_debug_dsym (TestDataFormatterStdVector.StdVectorDataFormatterTestCase.test_ref_and_ptr_libstdcxx_debug_dsym) (test case does not fall in any category of interest for this run) 
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Fatal Python error: Segmentation fault

Thread 0x0000720310874080 (most recent call first):
  File "/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/local/lib/python3.12/dist-packages/lldb/__init__.py", line 12992 in Launch
  File "/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/llvm-project/lldb/packages/Python/lldbsuite/test/lldbutil.py", line 874 in run_to_breakpoint_do_run

AlexMaclean requested review from Artem-B and justinfargnoli August 6, 2025 16:00

AlexMaclean self-assigned this Aug 6, 2025

llvmbot added the backend:NVPTX label Aug 6, 2025

AlexMaclean force-pushed the dev/amaclean/ncf-fixup-upstream branch 3 times, most recently from 77706ef to 051dca0 Compare August 8, 2025 04:15

AlexMaclean requested a review from durga4github August 8, 2025 04:16

AlexMaclean force-pushed the dev/amaclean/ncf-fixup-upstream branch from 051dca0 to 330dfd4 Compare August 8, 2025 15:12

AlexMaclean added 2 commits August 8, 2025 15:14

nfc cleanup

2a216ae

nfc cleanup 2

543e3cf

AlexMaclean force-pushed the dev/amaclean/ncf-fixup-upstream branch 3 times, most recently from 09a8572 to 8b5e295 Compare August 11, 2025 17:36

nfc cleanup 3

8b5e295

justinfargnoli approved these changes Aug 11, 2025

View reviewed changes

AlexMaclean added 2 commits August 11, 2025 18:58

address comments

ce4e103

Merge branch 'main' into dev/amaclean/ncf-fixup-upstream

3115fba

AlexMaclean merged commit 9e6b291 into llvm:main Aug 13, 2025
9 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[NVPTX] miscellaneous minor cleanup (NFC) #152329

[NVPTX] miscellaneous minor cleanup (NFC) #152329

Uh oh!

AlexMaclean commented Aug 6, 2025

Uh oh!

llvmbot commented Aug 6, 2025

Uh oh!

github-actions bot commented Aug 6, 2025 •

edited

Loading

Uh oh!

justinfargnoli left a comment

Uh oh!

justinfargnoli Aug 11, 2025

Uh oh!

AlexMaclean Aug 11, 2025

Uh oh!

justinfargnoli Aug 11, 2025

Uh oh!

AlexMaclean Aug 11, 2025

Uh oh!

Uh oh!

llvm-ci commented Aug 13, 2025

Uh oh!

Uh oh!

		if (spN == 0 && mN->getMemOperand()->getPseudoValue())
		return true;

[NVPTX] miscellaneous minor cleanup (NFC) #152329

[NVPTX] miscellaneous minor cleanup (NFC) #152329

Uh oh!

Conversation

AlexMaclean commented Aug 6, 2025

Uh oh!

llvmbot commented Aug 6, 2025

Uh oh!

github-actions bot commented Aug 6, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

justinfargnoli left a comment

Choose a reason for hiding this comment

Uh oh!

justinfargnoli Aug 11, 2025

Choose a reason for hiding this comment

Uh oh!

AlexMaclean Aug 11, 2025

Choose a reason for hiding this comment

Uh oh!

justinfargnoli Aug 11, 2025

Choose a reason for hiding this comment

Uh oh!

AlexMaclean Aug 11, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Aug 13, 2025

Uh oh!

Uh oh!

github-actions bot commented Aug 6, 2025 •

edited

Loading