@@ -41,6 +41,45 @@ def AS_match {
4141 }];
4242}
4343
44+
45+ //===----------------------------------------------------------------------===//
46+ // NVPTX Scope Constants
47+ // These map to the Scope enum in NVPTX.h
48+ //===----------------------------------------------------------------------===//
49+
50+ def Scope_thread : PatLeaf<(i32 0)>; // Thread = 0
51+ def Scope_cta : PatLeaf<(i32 1)>; // Block = 1
52+ def Scope_cluster : PatLeaf<(i32 2)>; // Cluster = 2
53+ def Scope_device : PatLeaf<(i32 3)>; // Device = 3
54+ def Scope_sys : PatLeaf<(i32 4)>; // System = 4
55+
56+ //===----------------------------------------------------------------------===//
57+ // NVPTX Address Space Constants
58+ // These map to the AddressSpace enum in NVPTX.h
59+ //===----------------------------------------------------------------------===//
60+
61+ def AddrSpace_gen : PatLeaf<(i32 0)>; // Generic = 0
62+ def AddrSpace_global : PatLeaf<(i32 1)>; // Global = 1
63+ def AddrSpace_shared : PatLeaf<(i32 3)>; // Shared = 3
64+ def AddrSpace_const : PatLeaf<(i32 4)>; // Const = 4
65+ def AddrSpace_local : PatLeaf<(i32 5)>; // Local = 5
66+ def AddrSpace_shared_cluster : PatLeaf<(i32 7)>; // SharedCluster = 7
67+ def AddrSpace_param : PatLeaf<(i32 101)>; // Param = 101
68+
69+ //===----------------------------------------------------------------------===//
70+ // NVPTX Ordering Constants
71+ // These map to the Ordering enum in NVPTX.h
72+ //===----------------------------------------------------------------------===//
73+
74+ def Ordering_not_atomic : PatLeaf<(i32 0)>; // NotAtomic = 0
75+ def Ordering_relaxed : PatLeaf<(i32 2)>; // Relaxed = 1
76+ def Ordering_acquire : PatLeaf<(i32 4)>; // Acquire = 4
77+ def Ordering_release : PatLeaf<(i32 5)>; // Release = 5
78+ def Ordering_acquire_release : PatLeaf<(i32 6)>; // AcquireRelease = 6
79+ def Ordering_sequentially_consistent : PatLeaf<(i32 7)>; // SequentiallyConsistent = 7
80+ def Ordering_volatile : PatLeaf<(i32 8)>; // Volatile = 8
81+ def Ordering_relaxed_mmio : PatLeaf<(i32 9)>; // RelaxedMMIO = 9
82+
4483multiclass nvvm_ternary_atomic_op_scoped<SDPatternOperator frag> {
4584 defvar frag_pat = (frag node:$ptr, node:$cmp, node:$val);
4685 def NAME#_cta: PatFrag<!setdagop(frag_pat, ops),
@@ -1842,60 +1881,29 @@ multiclass F_ATOMIC_2<RegTyInfo t, string sem_str, string as_str, string op_str,
18421881 }
18431882}
18441883
1845- // has 3 operands
1846- multiclass F_ATOMIC_3<RegTyInfo t, string sem_str, string as_str, string op_str,
1847- SDPatternOperator op, list<Predicate> preds> {
1848- defvar asm_str = "atom" # sem_str # as_str # "." # op_str;
1849- let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
1850- def rr : BasicNVPTXInst<(outs t.RC:$dst),
1851- (ins ADDR:$addr, t.RC:$b, t.RC:$c),
1852- asm_str,
1853- [(set t.Ty:$dst, (op addr:$addr, t.Ty:$b, t.Ty:$c))]>,
1854- Requires<preds>;
1855-
1856- def ir : BasicNVPTXInst<(outs t.RC:$dst),
1857- (ins ADDR:$addr, t.Imm:$b, t.RC:$c),
1858- asm_str,
1859- [(set t.Ty:$dst, (op addr:$addr, (t.Ty t.ImmNode:$b), t.Ty:$c))]>,
1860- Requires<preds>;
1861-
1862- def ri : BasicNVPTXInst<(outs t.RC:$dst),
1863- (ins ADDR:$addr, t.RC:$b, t.Imm:$c),
1864- asm_str,
1865- [(set t.Ty:$dst, (op addr:$addr, t.Ty:$b, (t.Ty t.ImmNode:$c)))]>,
1866- Requires<preds>;
1867-
1868- def ii : BasicNVPTXInst<(outs t.RC:$dst),
1869- (ins ADDR:$addr, t.Imm:$b, t.Imm:$c),
1870- asm_str,
1871- [(set t.Ty:$dst, (op addr:$addr, (t.Ty t.ImmNode:$b), (t.Ty t.ImmNode:$c)))]>,
1872- Requires<preds>;
1873- }
1874- }
1875-
1876- multiclass F_ATOMIC_3_MANYOPERAND<RegTyInfo t, string op_str> {
1877- defvar asm_str = "atom${sem:sem}${scope:scope}${addsp:addsp}" # op_str # " \t$dst, [$addr], $b, $c;";
1884+ multiclass F_ATOMIC_3<RegTyInfo t, string op_str> {
1885+ defvar asm_str = "atom${sem:sem}${scope:scope}${addsp:addsp}" # op_str
18781886
18791887 let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
1880- def _rr : NVPTXInst <(outs t.RC:$dst),
1888+ def _rr : BasicNVPTXInst <(outs t.RC:$dst),
18811889 (ins ADDR:$addr, t.RC:$b, t.RC:$c, AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp),
18821890 asm_str, []>;
18831891
1884- def _ir : NVPTXInst <(outs t.RC:$dst),
1892+ def _ir : BasicNVPTXInst <(outs t.RC:$dst),
18851893 (ins ADDR:$addr, t.Imm:$b, t.RC:$c, AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp),
18861894 asm_str, []>;
18871895
1888- def _ri : NVPTXInst <(outs t.RC:$dst),
1896+ def _ri : BasicNVPTXInst <(outs t.RC:$dst),
18891897 (ins ADDR:$addr, t.RC:$b, t.Imm:$c, AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp),
18901898 asm_str, []>;
18911899
1892- def _ii : NVPTXInst <(outs t.RC:$dst),
1900+ def _ii : BasicNVPTXInst <(outs t.RC:$dst),
18931901 (ins ADDR:$addr, t.Imm:$b, t.Imm:$c, AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp),
18941902 asm_str, []>;
18951903 }
18961904}
18971905
1898- multiclass F_ATOMIC_3_MANYOPERAND_PATTERN <RegTyInfo t, string InstructionName, SDPatternOperator op, SDNode atomic> {
1906+ multiclass F_ATOMIC_3_PATTERN <RegTyInfo t, string InstructionName, SDPatternOperator op, SDNode atomic> {
18991907 defvar GetSem = SDNodeXForm<atomic, [{
19001908 return getI32Imm(getMemOrder(cast<MemSDNode>(N)), SDLoc(N));
19011909 }]>;
@@ -1929,14 +1937,6 @@ multiclass F_ATOMIC_2_AS<RegTyInfo t, SDPatternOperator frag, string op_str, lis
19291937 defm _GEN : F_ATOMIC_2<t, "", "", op_str, ATOMIC_GENERIC_CHK<frag_pat>, preds>;
19301938}
19311939
1932- multiclass F_ATOMIC_3_AS<RegTyInfo t, SDPatternOperator frag, string scope_str, string sem_str, string op_str, list<Predicate> preds = []> {
1933- defvar frag_pat = (frag node:$a, node:$b, node:$c);
1934- defm _G : F_ATOMIC_3<t, sem_str, scope_str, ".global", op_str, ATOMIC_GLOBAL_CHK<frag_pat>, preds>;
1935- defm _S : F_ATOMIC_3<t, sem_str, scope_str, ".shared", op_str, ATOMIC_SHARED_CHK<frag_pat>, preds>;
1936- defm _S_C : F_ATOMIC_3<t, sem_str, scope_str, ".shared::cluster", op_str, ATOMIC_SHARED_CLUSTER_CHK<frag_pat>, !listconcat([hasClusters], preds)>;
1937- defm _GEN : F_ATOMIC_3<t, sem_str, scope_str, "", op_str, ATOMIC_GENERIC_CHK<frag_pat>, preds>;
1938- }
1939-
19401940// atom_add
19411941defm INT_PTX_ATOM_ADD_32 : F_ATOMIC_2_AS<I32RT, atomic_load_add_i32, "add.u32">;
19421942defm INT_PTX_ATOM_ADD_64 : F_ATOMIC_2_AS<I64RT, atomic_load_add_i64, "add.u64">;
@@ -1981,17 +1981,14 @@ defm INT_PTX_ATOM_XOR_64 : F_ATOMIC_2_AS<I64RT, atomic_load_xor_i64, "xor.b64",
19811981
19821982// Define atom.cas for all combinations of size x addrspace x memory order
19831983// supported in PTX *and* on the hardware.
1984- foreach t = [I32RT, I64RT] in {
1984+ foreach t = [I16RT, I32RT, I64RT] in {
19851985 defvar atomic_cmp_swap_pat = !cast<PatFrag>("atomic_cmp_swap_i"#t.Size);
19861986 defm INT_PTX_ATOM_CAS_#t.Size
1987- : F_ATOMIC_3_MANYOPERAND <t, ".cas.b"#t.Size>;
1987+ : F_ATOMIC_3 <t, ".cas.b"#t.Size>;
19881988
1989- defm INT_PTX_ATOM_CAS_PAT_#t.Size : F_ATOMIC_3_MANYOPERAND_PATTERN <t, "INT_PTX_ATOM_CAS_"#t.Size, atomic_cmp_swap_pat, atomic_cmp_swap>;
1989+ defm INT_PTX_ATOM_CAS_PAT_#t.Size : F_ATOMIC_3_PATTERN <t, "INT_PTX_ATOM_CAS_"#t.Size, atomic_cmp_swap_pat, atomic_cmp_swap>;
19901990}
19911991
1992- // Note that 16-bit CAS support in PTX is emulated.
1993- defm INT_PTX_ATOM_CAS_16 : F_ATOMIC_3_AS<I16RT, atomic_cmp_swap_i16, "", "", "cas.b16", [hasSM<70>, hasPTX<63>]>;
1994-
19951992// Support for scoped atomic operations. Matches
19961993// int_nvvm_atomic_{op}_{space}_{type}_{scope}
19971994// and converts it into the appropriate instruction.
@@ -2015,20 +2012,6 @@ multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
20152012 # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
20162013 preds = Preds>;
20172014}
2018- multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
2019- string ScopeStr, string SpaceStr,
2020- RegTyInfo t, list<Predicate> Preds> {
2021- defm "" : F_ATOMIC_3<t,
2022- as_str = !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr),
2023- sem_str = "",
2024- scope_str = !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr),
2025- op_str = OpStr # "." # TypeStr,
2026- op = !cast<Intrinsic>(
2027- "int_nvvm_atomic_" # OpStr
2028- # "_" # SpaceStr # "_" # IntTypeStr
2029- # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
2030- preds = Preds>;
2031- }
20322015
20332016// Constructs variants for different scopes of atomic op.
20342017multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
@@ -2043,15 +2026,27 @@ multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
20432026 }
20442027 }
20452028}
2046- multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
2047- RegTyInfo t, list<Predicate> Preds> {
2048- // No need to define ".gpu"-scoped atomics. They do the same thing
2049- // as the regular, non-scoped atomics defined elsewhere.
2029+
2030+ multiclass F_ATOMIC_3_INTRINSIC_PATTERN<RegTyInfo t, string OpStr, string InstructionName, string IntTypeStr> {
20502031 foreach scope = ["cta", "sys"] in {
2051- // For now we only need variants for generic space pointers.
20522032 foreach space = ["gen"] in {
2033+ <<<<<<< HEAD
20532034 defm _#scope#space : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, scope, space,
20542035 t, !listconcat(Preds, [hasAtomScope])>;
2036+ =======
2037+ defvar intrinsic = !cast<SDPatternOperator>("int_nvvm_atomic_" # OpStr # "_" # space # "_" # IntTypeStr # "_" # scope);
2038+ def : Pat<(t.Ty (intrinsic addr:$addr, t.Ty:$b, t.Ty:$c)),
2039+ (!cast<Instruction>(InstructionName # "_rr") ADDR:$addr, t.Ty:$b, t.Ty:$c, Ordering_not_atomic, !cast<PatLeaf>("Scope_" # scope), !cast<PatLeaf>("AddrSpace_" # space))>;
2040+
2041+ def : Pat<(t.Ty (intrinsic addr:$addr, (t.Ty t.ImmNode:$b), t.Ty:$c)),
2042+ (!cast<Instruction>(InstructionName # "_ir") ADDR:$addr, (t.Ty t.ImmNode:$b), t.Ty:$c, Ordering_not_atomic, !cast<PatLeaf>("Scope_" # scope), !cast<PatLeaf>("AddrSpace_" # space))>;
2043+
2044+ def : Pat<(t.Ty (intrinsic addr:$addr, t.Ty:$b, (t.Ty t.ImmNode:$c))),
2045+ (!cast<Instruction>(InstructionName # "_ri") ADDR:$addr, t.Ty:$b, (t.Ty t.ImmNode:$c), Ordering_not_atomic, !cast<PatLeaf>("Scope_" # scope), !cast<PatLeaf>("AddrSpace_" # space))>;
2046+
2047+ def : Pat<(t.Ty (intrinsic addr:$addr, (t.Ty t.ImmNode:$b), (t.Ty t.ImmNode:$c))),
2048+ (!cast<Instruction>(InstructionName # "_ii") ADDR:$addr, (t.Ty t.ImmNode:$b), (t.Ty t.ImmNode:$c), Ordering_not_atomic, !cast<PatLeaf>("Scope_" # scope), !cast<PatLeaf>("AddrSpace_" # space))>;
2049+ >>>>>>> Replace old F_ATOMIC_3 completely with a single-opcode variation
20552050 }
20562051 }
20572052}
@@ -2094,9 +2089,9 @@ multiclass ATOM2_incdec_impl<string OpStr> {
20942089
20952090// atom.cas
20962091multiclass ATOM3_cas_impl<string OpStr> {
2097- defm _b16 : ATOM3S_impl< OpStr, "i ", "b16", I16RT, [] >;
2098- defm _b32 : ATOM3S_impl< OpStr, "i ", "b32", I32RT, [] >;
2099- defm _b64 : ATOM3S_impl< OpStr, "i ", "b64", I64RT, [] >;
2092+ defm _b16 : F_ATOMIC_3_INTRINSIC_PATTERN<I16RT, OpStr, "INT_PTX_ATOM_CAS_16 ", "i" >;
2093+ defm _b32 : F_ATOMIC_3_INTRINSIC_PATTERN<I32RT, OpStr, "INT_PTX_ATOM_CAS_32 ", "i" >;
2094+ defm _b64 : F_ATOMIC_3_INTRINSIC_PATTERN<I64RT, OpStr, "INT_PTX_ATOM_CAS_64 ", "i" >;
21002095}
21012096
21022097defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
0 commit comments