Skip to content
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
a5f5f62
[NVPTX] Add syncscope support for cmpxchg
akshayrdeodhar May 9, 2025
15c4fa4
fix build and tests
akshayrdeodhar May 21, 2025
5c7d417
clang-format
akshayrdeodhar May 21, 2025
fbca61b
Update APIs in VE target
akshayrdeodhar May 30, 2025
13018b7
Fence scope is the same as cmpxchg scope- dont add API
akshayrdeodhar May 30, 2025
af29d07
test default syncscope
akshayrdeodhar May 30, 2025
1882525
[NVPTX] Add syncscope support for cmpxchg
akshayrdeodhar May 9, 2025
66d415f
clang-format
akshayrdeodhar May 30, 2025
c719cb2
black format
akshayrdeodhar May 30, 2025
9f167c0
address review comments
akshayrdeodhar May 30, 2025
d32fcb0
clang-format
akshayrdeodhar Jun 2, 2025
6b2e54c
Define new CMPXCHG instruction which takes sem, scope, and addressspa…
akshayrdeodhar Jun 30, 2025
28024fc
SM60 supports scope on atom.cas
akshayrdeodhar Jul 1, 2025
bbd7015
Replace old F_ATOMIC_3 completely with a single-opcode variation
akshayrdeodhar Jul 1, 2025
2a5d458
cleanup
akshayrdeodhar Jul 1, 2025
3722e80
clang-format
akshayrdeodhar Jul 1, 2025
dd6cb8c
No changes necessary to AtomicExpandPass
akshayrdeodhar Jul 1, 2025
91c38a5
formatting
akshayrdeodhar Jul 1, 2025
9ff3279
update tests after rebase
akshayrdeodhar Jul 1, 2025
acd31be
address review comments
akshayrdeodhar Jul 8, 2025
4fb5784
update tests, address review comments
akshayrdeodhar Jul 10, 2025
e007be2
remove unnecessary multiclass
akshayrdeodhar Jul 10, 2025
8a79f5d
only emit one slice of tests
akshayrdeodhar Jul 11, 2025
d76cb8b
black
akshayrdeodhar Jul 11, 2025
676b684
address review comments
akshayrdeodhar Jul 14, 2025
9bfa008
Update llvm/lib/Target/NVPTX/NVPTX.h
akshayrdeodhar Jul 15, 2025
5290112
Merge branch 'main' into upstream/cmpxchg-syncscope
akshayrdeodhar Jul 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 14 additions & 8 deletions llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,8 @@ void NVPTXInstPrinter::printCmpMode(const MCInst *MI, int OpNum, raw_ostream &O,
llvm_unreachable("Empty Modifier");
}

void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
raw_ostream &O, StringRef Modifier) {
void NVPTXInstPrinter::printAtomicCode(const MCInst *MI, int OpNum,
raw_ostream &O, StringRef Modifier) {
const MCOperand &MO = MI->getOperand(OpNum);
int Imm = (int)MO.getImm();
if (Modifier == "sem") {
Expand All @@ -286,6 +286,12 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
case NVPTX::Ordering::Release:
O << ".release";
return;
case NVPTX::Ordering::AcquireRelease:
O << ".acq_rel";
return;
case NVPTX::Ordering::SequentiallyConsistent:
O << ".seq_cst";
return;
case NVPTX::Ordering::Volatile:
O << ".volatile";
return;
Expand All @@ -294,14 +300,14 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
return;
default:
report_fatal_error(formatv(
"NVPTX LdStCode Printer does not support \"{}\" sem modifier. "
"Loads/Stores cannot be AcquireRelease or SequentiallyConsistent.",
"NVPTX AtomicCode Printer does not support \"{}\" sem modifier. ",
OrderingToString(Ordering)));
}
} else if (Modifier == "scope") {
auto S = NVPTX::Scope(Imm);
switch (S) {
case NVPTX::Scope::Thread:
case NVPTX::Scope::DefaultDevice:
return;
case NVPTX::Scope::System:
O << ".sys";
Expand All @@ -316,9 +322,9 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
O << ".gpu";
return;
}
report_fatal_error(
formatv("NVPTX LdStCode Printer does not support \"{}\" sco modifier.",
ScopeToString(S)));
report_fatal_error(formatv(
"NVPTX AtomicCode Printer does not support \"{}\" scope modifier.",
ScopeToString(S)));
} else if (Modifier == "addsp") {
auto A = NVPTX::AddressSpace(Imm);
switch (A) {
Expand All @@ -334,7 +340,7 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
return;
}
report_fatal_error(formatv(
"NVPTX LdStCode Printer does not support \"{}\" addsp modifier.",
"NVPTX AtomicCode Printer does not support \"{}\" addsp modifier.",
AddressSpaceToString(A)));
} else if (Modifier == "sign") {
switch (Imm) {
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ class NVPTXInstPrinter : public MCInstPrinter {
StringRef Modifier = {});
void printCmpMode(const MCInst *MI, int OpNum, raw_ostream &O,
StringRef Modifier = {});
void printLdStCode(const MCInst *MI, int OpNum, raw_ostream &O,
StringRef Modifier = {});
void printAtomicCode(const MCInst *MI, int OpNum, raw_ostream &O,
StringRef Modifier = {});
void printMmaCode(const MCInst *MI, int OpNum, raw_ostream &O,
StringRef Modifier = {});
void printMemOperand(const MCInst *MI, int OpNum, raw_ostream &O,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTX.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,8 @@ enum Scope : ScopeUnderlyingType {
Cluster = 2,
Device = 3,
System = 4,
LASTSCOPE = System
DefaultDevice = 5,
LASTSCOPE = DefaultDevice
};

using AddressSpaceUnderlyingType = unsigned int;
Expand Down
51 changes: 43 additions & 8 deletions llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
return true;
}

static std::optional<unsigned> convertAS(unsigned AS) {
static std::optional<NVPTX::AddressSpace> convertAS(unsigned AS) {
switch (AS) {
case llvm::ADDRESS_SPACE_LOCAL:
return NVPTX::AddressSpace::Local;
Expand All @@ -508,11 +508,42 @@ static std::optional<unsigned> convertAS(unsigned AS) {
}
}

static unsigned int getCodeAddrSpace(const MemSDNode *N) {
NVPTX::AddressSpace NVPTXDAGToDAGISel::getAddrSpace(const MemSDNode *N) {
return convertAS(N->getMemOperand()->getAddrSpace())
.value_or(NVPTX::AddressSpace::Generic);
}

NVPTX::Ordering NVPTXDAGToDAGISel::getMemOrder(const MemSDNode *N) const {
// No "sem" orderings for SM/PTX versions which do not support memory ordering
if (!Subtarget->hasMemoryOrdering())
return NVPTX::Ordering::NotAtomic;
auto Ordering = N->getMergedOrdering();
switch (Ordering) {
case AtomicOrdering::NotAtomic:
return NVPTX::Ordering::NotAtomic;
case AtomicOrdering::Unordered:
case AtomicOrdering::Monotonic:
return NVPTX::Ordering::Relaxed;
case AtomicOrdering::Acquire:
return NVPTX::Ordering::Acquire;
case AtomicOrdering::Release:
return NVPTX::Ordering::Release;
case AtomicOrdering::AcquireRelease:
return NVPTX::Ordering::AcquireRelease;
case AtomicOrdering::SequentiallyConsistent:
return NVPTX::Ordering::SequentiallyConsistent;
}
llvm_unreachable("Invalid atomic ordering");
}

NVPTX::Scope NVPTXDAGToDAGISel::getAtomicScope(const MemSDNode *N) const {
// No "scope" modifier for SM/PTX versions which do not support scoped atomics
// Functionally, these atomics are at device scope
if (!Subtarget->hasAtomScope())
return NVPTX::Scope::DefaultDevice;
return Scopes[N->getSyncScopeID()];
}

namespace {

struct OperationOrderings {
Expand All @@ -525,7 +556,7 @@ struct OperationOrderings {
static OperationOrderings
getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
AtomicOrdering Ordering = N->getSuccessOrdering();
auto CodeAddrSpace = getCodeAddrSpace(N);
auto CodeAddrSpace = NVPTXDAGToDAGISel::getAddrSpace(N);

bool HasMemoryOrdering = Subtarget->hasMemoryOrdering();
bool HasRelaxedMMIO = Subtarget->hasRelaxedMMIO();
Expand Down Expand Up @@ -749,7 +780,7 @@ NVPTX::Scope NVPTXDAGToDAGISel::getOperationScope(MemSDNode *N,
}

static bool canLowerToLDG(const MemSDNode &N, const NVPTXSubtarget &Subtarget,
unsigned CodeAddrSpace) {
NVPTX::AddressSpace CodeAddrSpace) {
// We use ldg (i.e. ld.global.nc) for invariant loads from the global address
// space.
return Subtarget.hasLDG() && CodeAddrSpace == NVPTX::AddressSpace::Global &&
Expand Down Expand Up @@ -781,6 +812,7 @@ static unsigned int getFenceOp(NVPTX::Ordering O, NVPTX::Scope S,
return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acquire_gpu
: NVPTX::INT_MEMBAR_GL;
case NVPTX::Scope::Thread:
case NVPTX::Scope::DefaultDevice:
report_fatal_error(
formatv("Unsupported scope \"{}\" for acquire/release/acq_rel fence.",
ScopeToString(S)));
Expand All @@ -800,6 +832,7 @@ static unsigned int getFenceOp(NVPTX::Ordering O, NVPTX::Scope S,
return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_release_gpu
: NVPTX::INT_MEMBAR_GL;
case NVPTX::Scope::Thread:
case NVPTX::Scope::DefaultDevice:
report_fatal_error(
formatv("Unsupported scope \"{}\" for acquire/release/acq_rel fence.",
ScopeToString(S)));
Expand All @@ -819,6 +852,7 @@ static unsigned int getFenceOp(NVPTX::Ordering O, NVPTX::Scope S,
return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_gpu
: NVPTX::INT_MEMBAR_GL;
case NVPTX::Scope::Thread:
case NVPTX::Scope::DefaultDevice:
report_fatal_error(
formatv("Unsupported scope \"{}\" for acquire/release/acq_rel fence.",
ScopeToString(S)));
Expand All @@ -839,6 +873,7 @@ static unsigned int getFenceOp(NVPTX::Ordering O, NVPTX::Scope S,
return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_gpu
: NVPTX::INT_MEMBAR_GL;
case NVPTX::Scope::Thread:
case NVPTX::Scope::DefaultDevice:
report_fatal_error(formatv("Unsupported scope \"{}\" for seq_cst fence.",
ScopeToString(S)));
}
Expand Down Expand Up @@ -1017,7 +1052,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
const MVT LoadedVT = LoadedEVT.getSimpleVT();

// Address Space Setting
const unsigned CodeAddrSpace = getCodeAddrSpace(LD);
const auto CodeAddrSpace = getAddrSpace(LD);
if (canLowerToLDG(*LD, *Subtarget, CodeAddrSpace))
return tryLDG(LD);

Expand Down Expand Up @@ -1089,7 +1124,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
const MVT MemVT = MemEVT.getSimpleVT();

// Address Space Setting
const unsigned CodeAddrSpace = getCodeAddrSpace(LD);
const auto CodeAddrSpace = getAddrSpace(LD);
if (canLowerToLDG(*LD, *Subtarget, CodeAddrSpace))
return tryLDG(LD);

Expand Down Expand Up @@ -1305,7 +1340,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
return false;

// Address Space Setting
const unsigned CodeAddrSpace = getCodeAddrSpace(ST);
const auto CodeAddrSpace = getAddrSpace(ST);

SDLoc DL(ST);
SDValue Chain = ST->getChain();
Expand Down Expand Up @@ -1355,7 +1390,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
assert(StoreVT.isSimple() && "Store value is not simple");

// Address Space Setting
const unsigned CodeAddrSpace = getCodeAddrSpace(ST);
const auto CodeAddrSpace = getAddrSpace(ST);
if (CodeAddrSpace == NVPTX::AddressSpace::Const) {
report_fatal_error("Cannot store to pointer that points to constant "
"memory space");
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
}
NVPTX::Ordering getMemOrder(const MemSDNode *N) const;
NVPTX::Scope getAtomicScope(const MemSDNode *N) const;

bool SelectADDR(SDValue Addr, SDValue &Base, SDValue &Offset);
SDValue getPTXCmpMode(const CondCodeSDNode &CondCode);
Expand All @@ -116,6 +118,9 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
std::pair<NVPTX::Ordering, NVPTX::Scope>
insertMemoryInstructionFence(SDLoc DL, SDValue &Chain, MemSDNode *N);
NVPTX::Scope getOperationScope(MemSDNode *N, NVPTX::Ordering O) const;

public:
static NVPTX::AddressSpace getAddrSpace(const MemSDNode *N);
};

class NVPTXDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
Expand Down
16 changes: 9 additions & 7 deletions llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6272,10 +6272,12 @@ Instruction *NVPTXTargetLowering::emitLeadingFence(IRBuilderBase &Builder,

// Specialize for cmpxchg
// Emit a fence.sc leading fence for cmpxchg seq_cst which are not emulated
SyncScope::ID SSID = cast<AtomicCmpXchgInst>(Inst)->getSyncScopeID();
if (isReleaseOrStronger(Ord))
return Ord == AtomicOrdering::SequentiallyConsistent
? Builder.CreateFence(AtomicOrdering::SequentiallyConsistent)
: Builder.CreateFence(AtomicOrdering::Release);
return Builder.CreateFence(Ord == AtomicOrdering::SequentiallyConsistent
? Ord
: AtomicOrdering::Release,
SSID);

return nullptr;
}
Expand All @@ -6287,15 +6289,15 @@ Instruction *NVPTXTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
if (!isa<AtomicCmpXchgInst>(Inst))
return TargetLoweringBase::emitTrailingFence(Builder, Inst, Ord);

auto *CI = cast<AtomicCmpXchgInst>(Inst);
auto CASWidth =
cast<IntegerType>(
dyn_cast<AtomicCmpXchgInst>(Inst)->getCompareOperand()->getType())
->getBitWidth();
cast<IntegerType>(CI->getCompareOperand()->getType())->getBitWidth();
SyncScope::ID SSID = CI->getSyncScopeID();
// Do not emit a trailing fence for cmpxchg seq_cst which are not emulated
if (isAcquireOrStronger(Ord) &&
(Ord != AtomicOrdering::SequentiallyConsistent ||
CASWidth < STI.getMinCmpXchgSizeInBits()))
return Builder.CreateFence(AtomicOrdering::Acquire);
return Builder.CreateFence(AtomicOrdering::Acquire, SSID);

return nullptr;
}
Expand Down
24 changes: 12 additions & 12 deletions llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1602,8 +1602,8 @@ def ADDR : Operand<pAny> {
let MIOperandInfo = (ops ADDR_base, i32imm);
}

def LdStCode : Operand<i32> {
let PrintMethod = "printLdStCode";
def AtomicCode : Operand<i32> {
let PrintMethod = "printAtomicCode";
}

def MmaCode : Operand<i32> {
Expand Down Expand Up @@ -1948,7 +1948,7 @@ defm ProxyRegB64 : ProxyRegInst<"b64", B64>;
class LD<NVPTXRegClass regclass>
: NVPTXInst<
(outs regclass:$dst),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Sign,
(ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, AtomicCode:$Sign,
i32imm:$fromWidth, ADDR:$addr),
"ld${sem:sem}${scope:scope}${addsp:addsp}.${Sign:sign}$fromWidth "
"\t$dst, [$addr];", []>;
Expand All @@ -1964,7 +1964,7 @@ class ST<DAGOperand O>
: NVPTXInst<
(outs),
(ins O:$src,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, i32imm:$toWidth,
AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$toWidth,
ADDR:$addr),
"st${sem:sem}${scope:scope}${addsp:addsp}.b$toWidth"
" \t[$addr], $src;", []>;
Expand All @@ -1982,21 +1982,21 @@ let mayStore=1, hasSideEffects=0 in {
multiclass LD_VEC<NVPTXRegClass regclass, bit support_v8 = false> {
def _v2 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
LdStCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
(ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp,
AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
"ld${sem:sem}${scope:scope}${addsp:addsp}.v2.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2}}, [$addr];", []>;
def _v4 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
LdStCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
(ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp,
AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
"ld${sem:sem}${scope:scope}${addsp:addsp}.v4.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
if support_v8 then
def _v8 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4,
regclass:$dst5, regclass:$dst6, regclass:$dst7, regclass:$dst8),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Sign,
(ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, AtomicCode:$Sign,
i32imm:$fromWidth, ADDR:$addr),
"ld${sem:sem}${scope:scope}${addsp:addsp}.v8.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2, $dst3, $dst4, $dst5, $dst6, $dst7, $dst8}}, "
Expand All @@ -2013,14 +2013,14 @@ multiclass ST_VEC<DAGOperand O, bit support_v8 = false> {
def _v2 : NVPTXInst<
(outs),
(ins O:$src1, O:$src2,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, i32imm:$fromWidth,
AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$fromWidth,
ADDR:$addr),
"st${sem:sem}${scope:scope}${addsp:addsp}.v2.b$fromWidth "
"\t[$addr], {{$src1, $src2}};", []>;
def _v4 : NVPTXInst<
(outs),
(ins O:$src1, O:$src2, O:$src3, O:$src4,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, i32imm:$fromWidth,
AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$fromWidth,
ADDR:$addr),
"st${sem:sem}${scope:scope}${addsp:addsp}.v4.b$fromWidth "
"\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
Expand All @@ -2029,7 +2029,7 @@ multiclass ST_VEC<DAGOperand O, bit support_v8 = false> {
(outs),
(ins O:$src1, O:$src2, O:$src3, O:$src4,
O:$src5, O:$src6, O:$src7, O:$src8,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, i32imm:$fromWidth,
AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$fromWidth,
ADDR:$addr),
"st${sem:sem}${scope:scope}${addsp:addsp}.v8.b$fromWidth "
"\t[$addr], "
Expand Down
Loading
Loading