Skip to content

Commit bbd7015

Browse files
Replace old F_ATOMIC_3 completely with a single-opcode variation
1 parent 28024fc commit bbd7015

File tree

1 file changed

+69
-74
lines changed

1 file changed

+69
-74
lines changed

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 69 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,45 @@ def AS_match {
4141
}];
4242
}
4343

44+
45+
//===----------------------------------------------------------------------===//
46+
// NVPTX Scope Constants
47+
// These map to the Scope enum in NVPTX.h
48+
//===----------------------------------------------------------------------===//
49+
50+
def Scope_thread : PatLeaf<(i32 0)>; // Thread = 0
51+
def Scope_cta : PatLeaf<(i32 1)>; // Block = 1
52+
def Scope_cluster : PatLeaf<(i32 2)>; // Cluster = 2
53+
def Scope_device : PatLeaf<(i32 3)>; // Device = 3
54+
def Scope_sys : PatLeaf<(i32 4)>; // System = 4
55+
56+
//===----------------------------------------------------------------------===//
57+
// NVPTX Address Space Constants
58+
// These map to the AddressSpace enum in NVPTX.h
59+
//===----------------------------------------------------------------------===//
60+
61+
def AddrSpace_gen : PatLeaf<(i32 0)>; // Generic = 0
62+
def AddrSpace_global : PatLeaf<(i32 1)>; // Global = 1
63+
def AddrSpace_shared : PatLeaf<(i32 3)>; // Shared = 3
64+
def AddrSpace_const : PatLeaf<(i32 4)>; // Const = 4
65+
def AddrSpace_local : PatLeaf<(i32 5)>; // Local = 5
66+
def AddrSpace_shared_cluster : PatLeaf<(i32 7)>; // SharedCluster = 7
67+
def AddrSpace_param : PatLeaf<(i32 101)>; // Param = 101
68+
69+
//===----------------------------------------------------------------------===//
70+
// NVPTX Ordering Constants
71+
// These map to the Ordering enum in NVPTX.h
72+
//===----------------------------------------------------------------------===//
73+
74+
def Ordering_not_atomic : PatLeaf<(i32 0)>; // NotAtomic = 0
75+
def Ordering_relaxed : PatLeaf<(i32 2)>; // Relaxed = 1
76+
def Ordering_acquire : PatLeaf<(i32 4)>; // Acquire = 4
77+
def Ordering_release : PatLeaf<(i32 5)>; // Release = 5
78+
def Ordering_acquire_release : PatLeaf<(i32 6)>; // AcquireRelease = 6
79+
def Ordering_sequentially_consistent : PatLeaf<(i32 7)>; // SequentiallyConsistent = 7
80+
def Ordering_volatile : PatLeaf<(i32 8)>; // Volatile = 8
81+
def Ordering_relaxed_mmio : PatLeaf<(i32 9)>; // RelaxedMMIO = 9
82+
4483
multiclass nvvm_ternary_atomic_op_scoped<SDPatternOperator frag> {
4584
defvar frag_pat = (frag node:$ptr, node:$cmp, node:$val);
4685
def NAME#_cta: PatFrag<!setdagop(frag_pat, ops),
@@ -1842,60 +1881,29 @@ multiclass F_ATOMIC_2<RegTyInfo t, string sem_str, string as_str, string op_str,
18421881
}
18431882
}
18441883

1845-
// has 3 operands
1846-
multiclass F_ATOMIC_3<RegTyInfo t, string sem_str, string as_str, string op_str,
1847-
SDPatternOperator op, list<Predicate> preds> {
1848-
defvar asm_str = "atom" # sem_str # as_str # "." # op_str;
1849-
let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
1850-
def rr : BasicNVPTXInst<(outs t.RC:$dst),
1851-
(ins ADDR:$addr, t.RC:$b, t.RC:$c),
1852-
asm_str,
1853-
[(set t.Ty:$dst, (op addr:$addr, t.Ty:$b, t.Ty:$c))]>,
1854-
Requires<preds>;
1855-
1856-
def ir : BasicNVPTXInst<(outs t.RC:$dst),
1857-
(ins ADDR:$addr, t.Imm:$b, t.RC:$c),
1858-
asm_str,
1859-
[(set t.Ty:$dst, (op addr:$addr, (t.Ty t.ImmNode:$b), t.Ty:$c))]>,
1860-
Requires<preds>;
1861-
1862-
def ri : BasicNVPTXInst<(outs t.RC:$dst),
1863-
(ins ADDR:$addr, t.RC:$b, t.Imm:$c),
1864-
asm_str,
1865-
[(set t.Ty:$dst, (op addr:$addr, t.Ty:$b, (t.Ty t.ImmNode:$c)))]>,
1866-
Requires<preds>;
1867-
1868-
def ii : BasicNVPTXInst<(outs t.RC:$dst),
1869-
(ins ADDR:$addr, t.Imm:$b, t.Imm:$c),
1870-
asm_str,
1871-
[(set t.Ty:$dst, (op addr:$addr, (t.Ty t.ImmNode:$b), (t.Ty t.ImmNode:$c)))]>,
1872-
Requires<preds>;
1873-
}
1874-
}
1875-
1876-
multiclass F_ATOMIC_3_MANYOPERAND<RegTyInfo t, string op_str> {
1877-
defvar asm_str = "atom${sem:sem}${scope:scope}${addsp:addsp}" # op_str # " \t$dst, [$addr], $b, $c;";
1884+
multiclass F_ATOMIC_3<RegTyInfo t, string op_str> {
1885+
defvar asm_str = "atom${sem:sem}${scope:scope}${addsp:addsp}" # op_str
18781886

18791887
let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
1880-
def _rr : NVPTXInst<(outs t.RC:$dst),
1888+
def _rr : BasicNVPTXInst<(outs t.RC:$dst),
18811889
(ins ADDR:$addr, t.RC:$b, t.RC:$c, AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp),
18821890
asm_str, []>;
18831891

1884-
def _ir : NVPTXInst<(outs t.RC:$dst),
1892+
def _ir : BasicNVPTXInst<(outs t.RC:$dst),
18851893
(ins ADDR:$addr, t.Imm:$b, t.RC:$c, AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp),
18861894
asm_str, []>;
18871895

1888-
def _ri : NVPTXInst<(outs t.RC:$dst),
1896+
def _ri : BasicNVPTXInst<(outs t.RC:$dst),
18891897
(ins ADDR:$addr, t.RC:$b, t.Imm:$c, AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp),
18901898
asm_str, []>;
18911899

1892-
def _ii : NVPTXInst<(outs t.RC:$dst),
1900+
def _ii : BasicNVPTXInst<(outs t.RC:$dst),
18931901
(ins ADDR:$addr, t.Imm:$b, t.Imm:$c, AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp),
18941902
asm_str, []>;
18951903
}
18961904
}
18971905

1898-
multiclass F_ATOMIC_3_MANYOPERAND_PATTERN<RegTyInfo t, string InstructionName, SDPatternOperator op, SDNode atomic> {
1906+
multiclass F_ATOMIC_3_PATTERN<RegTyInfo t, string InstructionName, SDPatternOperator op, SDNode atomic> {
18991907
defvar GetSem = SDNodeXForm<atomic, [{
19001908
return getI32Imm(getMemOrder(cast<MemSDNode>(N)), SDLoc(N));
19011909
}]>;
@@ -1929,14 +1937,6 @@ multiclass F_ATOMIC_2_AS<RegTyInfo t, SDPatternOperator frag, string op_str, lis
19291937
defm _GEN : F_ATOMIC_2<t, "", "", op_str, ATOMIC_GENERIC_CHK<frag_pat>, preds>;
19301938
}
19311939

1932-
multiclass F_ATOMIC_3_AS<RegTyInfo t, SDPatternOperator frag, string scope_str, string sem_str, string op_str, list<Predicate> preds = []> {
1933-
defvar frag_pat = (frag node:$a, node:$b, node:$c);
1934-
defm _G : F_ATOMIC_3<t, sem_str, scope_str, ".global", op_str, ATOMIC_GLOBAL_CHK<frag_pat>, preds>;
1935-
defm _S : F_ATOMIC_3<t, sem_str, scope_str, ".shared", op_str, ATOMIC_SHARED_CHK<frag_pat>, preds>;
1936-
defm _S_C : F_ATOMIC_3<t, sem_str, scope_str, ".shared::cluster", op_str, ATOMIC_SHARED_CLUSTER_CHK<frag_pat>, !listconcat([hasClusters], preds)>;
1937-
defm _GEN : F_ATOMIC_3<t, sem_str, scope_str, "", op_str, ATOMIC_GENERIC_CHK<frag_pat>, preds>;
1938-
}
1939-
19401940
// atom_add
19411941
defm INT_PTX_ATOM_ADD_32 : F_ATOMIC_2_AS<I32RT, atomic_load_add_i32, "add.u32">;
19421942
defm INT_PTX_ATOM_ADD_64 : F_ATOMIC_2_AS<I64RT, atomic_load_add_i64, "add.u64">;
@@ -1981,17 +1981,14 @@ defm INT_PTX_ATOM_XOR_64 : F_ATOMIC_2_AS<I64RT, atomic_load_xor_i64, "xor.b64",
19811981

19821982
// Define atom.cas for all combinations of size x addrspace x memory order
19831983
// supported in PTX *and* on the hardware.
1984-
foreach t = [I32RT, I64RT] in {
1984+
foreach t = [I16RT, I32RT, I64RT] in {
19851985
defvar atomic_cmp_swap_pat = !cast<PatFrag>("atomic_cmp_swap_i"#t.Size);
19861986
defm INT_PTX_ATOM_CAS_#t.Size
1987-
: F_ATOMIC_3_MANYOPERAND<t, ".cas.b"#t.Size>;
1987+
: F_ATOMIC_3<t, ".cas.b"#t.Size>;
19881988

1989-
defm INT_PTX_ATOM_CAS_PAT_#t.Size : F_ATOMIC_3_MANYOPERAND_PATTERN<t, "INT_PTX_ATOM_CAS_"#t.Size, atomic_cmp_swap_pat, atomic_cmp_swap>;
1989+
defm INT_PTX_ATOM_CAS_PAT_#t.Size : F_ATOMIC_3_PATTERN<t, "INT_PTX_ATOM_CAS_"#t.Size, atomic_cmp_swap_pat, atomic_cmp_swap>;
19901990
}
19911991

1992-
// Note that 16-bit CAS support in PTX is emulated.
1993-
defm INT_PTX_ATOM_CAS_16 : F_ATOMIC_3_AS<I16RT, atomic_cmp_swap_i16, "", "", "cas.b16", [hasSM<70>, hasPTX<63>]>;
1994-
19951992
// Support for scoped atomic operations. Matches
19961993
// int_nvvm_atomic_{op}_{space}_{type}_{scope}
19971994
// and converts it into the appropriate instruction.
@@ -2015,20 +2012,6 @@ multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
20152012
# !if(!empty(ScopeStr), "", "_" # ScopeStr)),
20162013
preds = Preds>;
20172014
}
2018-
multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
2019-
string ScopeStr, string SpaceStr,
2020-
RegTyInfo t, list<Predicate> Preds> {
2021-
defm "" : F_ATOMIC_3<t,
2022-
as_str = !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr),
2023-
sem_str = "",
2024-
scope_str = !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr),
2025-
op_str = OpStr # "." # TypeStr,
2026-
op = !cast<Intrinsic>(
2027-
"int_nvvm_atomic_" # OpStr
2028-
# "_" # SpaceStr # "_" # IntTypeStr
2029-
# !if(!empty(ScopeStr), "", "_" # ScopeStr)),
2030-
preds = Preds>;
2031-
}
20322015

20332016
// Constructs variants for different scopes of atomic op.
20342017
multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
@@ -2043,15 +2026,27 @@ multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
20432026
}
20442027
}
20452028
}
2046-
multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
2047-
RegTyInfo t, list<Predicate> Preds> {
2048-
// No need to define ".gpu"-scoped atomics. They do the same thing
2049-
// as the regular, non-scoped atomics defined elsewhere.
2029+
2030+
multiclass F_ATOMIC_3_INTRINSIC_PATTERN<RegTyInfo t, string OpStr, string InstructionName, string IntTypeStr> {
20502031
foreach scope = ["cta", "sys"] in {
2051-
// For now we only need variants for generic space pointers.
20522032
foreach space = ["gen"] in {
2033+
<<<<<<< HEAD
20532034
defm _#scope#space : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, scope, space,
20542035
t, !listconcat(Preds, [hasAtomScope])>;
2036+
=======
2037+
defvar intrinsic = !cast<SDPatternOperator>("int_nvvm_atomic_" # OpStr # "_" # space # "_" # IntTypeStr # "_" # scope);
2038+
def : Pat<(t.Ty (intrinsic addr:$addr, t.Ty:$b, t.Ty:$c)),
2039+
(!cast<Instruction>(InstructionName # "_rr") ADDR:$addr, t.Ty:$b, t.Ty:$c, Ordering_not_atomic, !cast<PatLeaf>("Scope_" # scope), !cast<PatLeaf>("AddrSpace_" # space))>;
2040+
2041+
def : Pat<(t.Ty (intrinsic addr:$addr, (t.Ty t.ImmNode:$b), t.Ty:$c)),
2042+
(!cast<Instruction>(InstructionName # "_ir") ADDR:$addr, (t.Ty t.ImmNode:$b), t.Ty:$c, Ordering_not_atomic, !cast<PatLeaf>("Scope_" # scope), !cast<PatLeaf>("AddrSpace_" # space))>;
2043+
2044+
def : Pat<(t.Ty (intrinsic addr:$addr, t.Ty:$b, (t.Ty t.ImmNode:$c))),
2045+
(!cast<Instruction>(InstructionName # "_ri") ADDR:$addr, t.Ty:$b, (t.Ty t.ImmNode:$c), Ordering_not_atomic, !cast<PatLeaf>("Scope_" # scope), !cast<PatLeaf>("AddrSpace_" # space))>;
2046+
2047+
def : Pat<(t.Ty (intrinsic addr:$addr, (t.Ty t.ImmNode:$b), (t.Ty t.ImmNode:$c))),
2048+
(!cast<Instruction>(InstructionName # "_ii") ADDR:$addr, (t.Ty t.ImmNode:$b), (t.Ty t.ImmNode:$c), Ordering_not_atomic, !cast<PatLeaf>("Scope_" # scope), !cast<PatLeaf>("AddrSpace_" # space))>;
2049+
>>>>>>> Replace old F_ATOMIC_3 completely with a single-opcode variation
20552050
}
20562051
}
20572052
}
@@ -2094,9 +2089,9 @@ multiclass ATOM2_incdec_impl<string OpStr> {
20942089

20952090
// atom.cas
20962091
multiclass ATOM3_cas_impl<string OpStr> {
2097-
defm _b16 : ATOM3S_impl<OpStr, "i", "b16", I16RT, []>;
2098-
defm _b32 : ATOM3S_impl<OpStr, "i", "b32", I32RT, []>;
2099-
defm _b64 : ATOM3S_impl<OpStr, "i", "b64", I64RT, []>;
2092+
defm _b16 : F_ATOMIC_3_INTRINSIC_PATTERN<I16RT, OpStr, "INT_PTX_ATOM_CAS_16", "i">;
2093+
defm _b32 : F_ATOMIC_3_INTRINSIC_PATTERN<I32RT, OpStr, "INT_PTX_ATOM_CAS_32", "i">;
2094+
defm _b64 : F_ATOMIC_3_INTRINSIC_PATTERN<I64RT, OpStr, "INT_PTX_ATOM_CAS_64", "i">;
21002095
}
21012096

21022097
defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;

0 commit comments

Comments
 (0)