Skip to content

Commit 6b2e54c

Browse files
Define new CMPXCHG instruction which takes sem, scope, and addressspace as constant operands, simplify codegen
1 parent d32fcb0 commit 6b2e54c

File tree

5 files changed

+101
-28
lines changed

5 files changed

+101
-28
lines changed

llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ void NVPTXInstPrinter::printCmpMode(const MCInst *MI, int OpNum, raw_ostream &O,
268268
llvm_unreachable("Empty Modifier");
269269
}
270270

271-
void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
271+
void NVPTXInstPrinter::printAtomicCode(const MCInst *MI, int OpNum,
272272
raw_ostream &O, StringRef Modifier) {
273273
const MCOperand &MO = MI->getOperand(OpNum);
274274
int Imm = (int)MO.getImm();
@@ -286,6 +286,12 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
286286
case NVPTX::Ordering::Release:
287287
O << ".release";
288288
return;
289+
case NVPTX::Ordering::AcquireRelease:
290+
O << ".acq_rel";
291+
return;
292+
case NVPTX::Ordering::SequentiallyConsistent:
293+
O << ".seq_cst";
294+
return;
289295
case NVPTX::Ordering::Volatile:
290296
O << ".volatile";
291297
return;
@@ -294,8 +300,7 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
294300
return;
295301
default:
296302
report_fatal_error(formatv(
297-
"NVPTX LdStCode Printer does not support \"{}\" sem modifier. "
298-
"Loads/Stores cannot be AcquireRelease or SequentiallyConsistent.",
303+
"NVPTX AtomicCode Printer does not support \"{}\" sem modifier. ",
299304
OrderingToString(Ordering)));
300305
}
301306
} else if (Modifier == "scope") {
@@ -317,7 +322,7 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
317322
return;
318323
}
319324
report_fatal_error(
320-
formatv("NVPTX LdStCode Printer does not support \"{}\" sco modifier.",
325+
formatv("NVPTX AtomicCode Printer does not support \"{}\" scope modifier.",
321326
ScopeToString(S)));
322327
} else if (Modifier == "addsp") {
323328
auto A = NVPTX::AddressSpace(Imm);
@@ -334,7 +339,7 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
334339
return;
335340
}
336341
report_fatal_error(formatv(
337-
"NVPTX LdStCode Printer does not support \"{}\" addsp modifier.",
342+
"NVPTX AtomicCode Printer does not support \"{}\" addsp modifier.",
338343
AddressSpaceToString(A)));
339344
} else if (Modifier == "sign") {
340345
switch (Imm) {

llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class NVPTXInstPrinter : public MCInstPrinter {
4040
StringRef Modifier = {});
4141
void printCmpMode(const MCInst *MI, int OpNum, raw_ostream &O,
4242
StringRef Modifier = {});
43-
void printLdStCode(const MCInst *MI, int OpNum, raw_ostream &O,
43+
void printAtomicCode(const MCInst *MI, int OpNum, raw_ostream &O,
4444
StringRef Modifier = {});
4545
void printMmaCode(const MCInst *MI, int OpNum, raw_ostream &O,
4646
StringRef Modifier = {});

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ void NVPTXDAGToDAGISel::SelectTcgen05Ld(SDNode *N, bool hasOffset) {
304304
}
305305
}
306306

307+
307308
bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
308309
unsigned IID = N->getConstantOperandVal(1);
309310
switch (IID) {
@@ -513,6 +514,40 @@ static unsigned int getCodeAddrSpace(const MemSDNode *N) {
513514
.value_or(NVPTX::AddressSpace::Generic);
514515
}
515516

517+
unsigned int NVPTXDAGToDAGISel::getAddrSpace(const MemSDNode *N) const {
518+
return convertAS(N->getMemOperand()->getAddrSpace())
519+
.value_or(NVPTX::AddressSpace::Generic);
520+
}
521+
522+
unsigned int NVPTXDAGToDAGISel::getMemOrder(const MemSDNode *N) const {
523+
// No "sem" orderings for SM/PTX versions which do not support memory ordering
524+
if (!Subtarget->hasMemoryOrdering())
525+
return NVPTX::Ordering::NotAtomic;
526+
auto Ordering = N->getMergedOrdering();
527+
switch (Ordering) {
528+
case AtomicOrdering::NotAtomic:
529+
case AtomicOrdering::Unordered:
530+
return NVPTX::Ordering::NotAtomic;
531+
case AtomicOrdering::Monotonic:
532+
return NVPTX::Ordering::Relaxed;
533+
case AtomicOrdering::Acquire:
534+
return NVPTX::Ordering::Acquire;
535+
case AtomicOrdering::Release:
536+
return NVPTX::Ordering::Release;
537+
case AtomicOrdering::AcquireRelease:
538+
return NVPTX::Ordering::AcquireRelease;
539+
case AtomicOrdering::SequentiallyConsistent:
540+
return NVPTX::Ordering::SequentiallyConsistent;
541+
}
542+
}
543+
544+
unsigned int NVPTXDAGToDAGISel::getAtomicScope(const MemSDNode *N) const {
545+
// No "scope" modifier for SM/PTX versions which do not support scoped atomics
546+
if (!Subtarget->hasAtomScope() || !Subtarget->hasMemoryOrdering())
547+
return NVPTX::Scope::Thread;
548+
return Scopes[N->getSyncScopeID()];
549+
}
550+
516551
namespace {
517552

518553
struct OperationOrderings {

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
102102
inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
103103
return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
104104
}
105+
unsigned int getAddrSpace(const MemSDNode *N) const;
106+
unsigned int getMemOrder(const MemSDNode *N) const;
107+
unsigned int getAtomicScope(const MemSDNode *N) const;
105108

106109
bool SelectADDR(SDValue Addr, SDValue &Base, SDValue &Offset);
107110
SDValue getPTXCmpMode(const CondCodeSDNode &CondCode);

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 52 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1873,6 +1873,54 @@ multiclass F_ATOMIC_3<RegTyInfo t, string sem_str, string as_str, string op_str,
18731873
}
18741874
}
18751875

1876+
multiclass F_ATOMIC_3_MANYOPERAND<RegTyInfo t, string op_str> {
1877+
defvar asm_str = "atom${sem:sem}${scope:scope}${addsp:addsp}" # op_str # " \t$dst, [$addr], $b, $c;";
1878+
1879+
let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
1880+
def _rr : NVPTXInst<(outs t.RC:$dst),
1881+
(ins ADDR:$addr, t.RC:$b, t.RC:$c, AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp),
1882+
asm_str, []>;
1883+
1884+
def _ir : NVPTXInst<(outs t.RC:$dst),
1885+
(ins ADDR:$addr, t.Imm:$b, t.RC:$c, AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp),
1886+
asm_str, []>;
1887+
1888+
def _ri : NVPTXInst<(outs t.RC:$dst),
1889+
(ins ADDR:$addr, t.RC:$b, t.Imm:$c, AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp),
1890+
asm_str, []>;
1891+
1892+
def _ii : NVPTXInst<(outs t.RC:$dst),
1893+
(ins ADDR:$addr, t.Imm:$b, t.Imm:$c, AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp),
1894+
asm_str, []>;
1895+
}
1896+
}
1897+
1898+
multiclass F_ATOMIC_3_MANYOPERAND_PATTERN<RegTyInfo t, string InstructionName, SDPatternOperator op, SDNode atomic> {
1899+
defvar GetSem = SDNodeXForm<atomic, [{
1900+
return getI32Imm(getMemOrder(cast<MemSDNode>(N)), SDLoc(N));
1901+
}]>;
1902+
1903+
defvar GetScope = SDNodeXForm<atomic, [{
1904+
return getI32Imm(getAtomicScope(cast<MemSDNode>(N)), SDLoc(N));
1905+
}]>;
1906+
1907+
defvar GetAddSp = SDNodeXForm<atomic, [{
1908+
return getI32Imm(getAddrSpace(cast<MemSDNode>(N)), SDLoc(N));
1909+
}]>;
1910+
1911+
def : Pat<(op:$this addr:$addr, t.Ty:$b, t.Ty:$c),
1912+
(!cast<Instruction>(InstructionName#_rr) ADDR:$addr, t.Ty:$b, t.Ty:$c, (GetSem $this), (GetScope $this), (GetAddSp $this))>;
1913+
1914+
def : Pat<(op:$this addr:$addr, (t.Ty t.ImmNode:$b), t.Ty:$c),
1915+
(!cast<Instruction>(InstructionName#_ir) ADDR:$addr, (t.Ty t.ImmNode:$b), t.Ty:$c, (GetSem $this), (GetScope $this), (GetAddSp $this))>;
1916+
1917+
def : Pat<(op:$this addr:$addr, t.Ty:$b, (t.Ty t.ImmNode:$c)),
1918+
(!cast<Instruction>(InstructionName#_#ri) ADDR:$addr, t.Ty:$b, (t.Ty t.ImmNode:$c), (GetSem $this), (GetScope $this), (GetAddSp $this))>;
1919+
1920+
def : Pat<(op:$this addr:$addr, (t.Ty t.ImmNode:$b), (t.Ty t.ImmNode:$c)),
1921+
(!cast<Instruction>(InstructionName#_#ii) ADDR:$addr, (t.Ty t.ImmNode:$b), (t.Ty t.ImmNode:$c), (GetSem $this), (GetScope $this), (GetAddSp $this))>;
1922+
}
1923+
18761924
multiclass F_ATOMIC_2_AS<RegTyInfo t, SDPatternOperator frag, string op_str, list<Predicate> preds = []> {
18771925
defvar frag_pat = (frag node:$a, node:$b);
18781926
defm _G : F_ATOMIC_2<t, "", ".global", op_str, ATOMIC_GLOBAL_CHK<frag_pat>, preds>;
@@ -1934,29 +1982,11 @@ defm INT_PTX_ATOM_XOR_64 : F_ATOMIC_2_AS<I64RT, atomic_load_xor_i64, "xor.b64",
19341982
// Define atom.cas for all combinations of size x addrspace x memory order
19351983
// supported in PTX *and* on the hardware.
19361984
foreach t = [I32RT, I64RT] in {
1937-
foreach order = ["acquire", "release", "acq_rel", "monotonic"] in {
1938-
defvar cas_order_string = !if(!eq(order, "monotonic"), ".relaxed", "."#order);
1939-
defvar atomic_cmp_swap_pat = !cast<PatFrag>("atomic_cmp_swap_i"#t.Size#_#order);
1940-
1941-
// Instantiate scoped versions of the atomic compare and swap pattern
1942-
defm atomic_cmp_swap_i#t.Size#_#order: nvvm_ternary_atomic_op_scoped<atomic_cmp_swap_pat>;
1943-
1944-
foreach scope = ["cta", "cluster", "gpu", "sys"] in {
1945-
defvar atomic_cmp_swap_pat_scoped = !cast<PatFrag>("atomic_cmp_swap_i"#t.Size#_#order#_#scope);
1985+
defvar atomic_cmp_swap_pat = !cast<PatFrag>("atomic_cmp_swap_i"#t.Size);
1986+
defm INT_PTX_ATOM_CAS_#t.Size
1987+
: F_ATOMIC_3_MANYOPERAND<t, ".cas.b"#t.Size>;
19461988

1947-
// Syncscope is only supported for SM70+
1948-
defm INT_PTX_ATOM_CAS_#t.Size#_#order#_#scope
1949-
: F_ATOMIC_3_AS<t, atomic_cmp_swap_pat_scoped, "."#scope, cas_order_string, "cas.b"#t.Size, [hasSM<70>, hasPTX<63>]>;
1950-
}
1951-
1952-
// Note that AtomicExpand will convert cmpxchg seq_cst to a cmpxchg monotonic with fences around it.
1953-
// Memory orders are only supported for SM70+, PTX63+- so we have two sets of instruction definitions-
1954-
// for SM70+, and "old" ones which lower to "atom.cas", for earlier archs.
1955-
defm INT_PTX_ATOM_CAS_#t.Size#_#order
1956-
: F_ATOMIC_3_AS<t, atomic_cmp_swap_pat, "", cas_order_string, "cas.b"#t.Size, [hasSM<70>, hasPTX<63>]>;
1957-
defm INT_PTX_ATOM_CAS_#t.Size#_#order#_old
1958-
: F_ATOMIC_3_AS<t, atomic_cmp_swap_pat, "", "", "cas.b"#t.Size, []>;
1959-
}
1989+
defm INT_PTX_ATOM_CAS_PAT_#t.Size : F_ATOMIC_3_MANYOPERAND_PATTERN<t, "INT_PTX_ATOM_CAS_"#t.Size, atomic_cmp_swap_pat, atomic_cmp_swap>;
19601990
}
19611991

19621992
// Note that 16-bit CAS support in PTX is emulated.

0 commit comments

Comments
 (0)