@@ -1873,6 +1873,54 @@ multiclass F_ATOMIC_3<RegTyInfo t, string sem_str, string as_str, string op_str,
18731873 }
18741874}
18751875
1876+ multiclass F_ATOMIC_3_MANYOPERAND<RegTyInfo t, string op_str> {
1877+ defvar asm_str = "atom${sem:sem}${scope:scope}${addsp:addsp}" # op_str # " \t$dst, [$addr], $b, $c;";
1878+
1879+ let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
1880+ def _rr : NVPTXInst<(outs t.RC:$dst),
1881+ (ins ADDR:$addr, t.RC:$b, t.RC:$c, AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp),
1882+ asm_str, []>;
1883+
1884+ def _ir : NVPTXInst<(outs t.RC:$dst),
1885+ (ins ADDR:$addr, t.Imm:$b, t.RC:$c, AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp),
1886+ asm_str, []>;
1887+
1888+ def _ri : NVPTXInst<(outs t.RC:$dst),
1889+ (ins ADDR:$addr, t.RC:$b, t.Imm:$c, AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp),
1890+ asm_str, []>;
1891+
1892+ def _ii : NVPTXInst<(outs t.RC:$dst),
1893+ (ins ADDR:$addr, t.Imm:$b, t.Imm:$c, AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp),
1894+ asm_str, []>;
1895+ }
1896+ }
1897+
1898+ multiclass F_ATOMIC_3_MANYOPERAND_PATTERN<RegTyInfo t, string InstructionName, SDPatternOperator op, SDNode atomic> {
1899+ defvar GetSem = SDNodeXForm<atomic, [{
1900+ return getI32Imm(getMemOrder(cast<MemSDNode>(N)), SDLoc(N));
1901+ }]>;
1902+
1903+ defvar GetScope = SDNodeXForm<atomic, [{
1904+ return getI32Imm(getAtomicScope(cast<MemSDNode>(N)), SDLoc(N));
1905+ }]>;
1906+
1907+ defvar GetAddSp = SDNodeXForm<atomic, [{
1908+ return getI32Imm(getAddrSpace(cast<MemSDNode>(N)), SDLoc(N));
1909+ }]>;
1910+
1911+ def : Pat<(op:$this addr:$addr, t.Ty:$b, t.Ty:$c),
1912+ (!cast<Instruction>(InstructionName#_rr) ADDR:$addr, t.Ty:$b, t.Ty:$c, (GetSem $this), (GetScope $this), (GetAddSp $this))>;
1913+
1914+ def : Pat<(op:$this addr:$addr, (t.Ty t.ImmNode:$b), t.Ty:$c),
1915+ (!cast<Instruction>(InstructionName#_ir) ADDR:$addr, (t.Ty t.ImmNode:$b), t.Ty:$c, (GetSem $this), (GetScope $this), (GetAddSp $this))>;
1916+
1917+ def : Pat<(op:$this addr:$addr, t.Ty:$b, (t.Ty t.ImmNode:$c)),
1918+ (!cast<Instruction>(InstructionName#_#ri) ADDR:$addr, t.Ty:$b, (t.Ty t.ImmNode:$c), (GetSem $this), (GetScope $this), (GetAddSp $this))>;
1919+
1920+ def : Pat<(op:$this addr:$addr, (t.Ty t.ImmNode:$b), (t.Ty t.ImmNode:$c)),
1921+ (!cast<Instruction>(InstructionName#_#ii) ADDR:$addr, (t.Ty t.ImmNode:$b), (t.Ty t.ImmNode:$c), (GetSem $this), (GetScope $this), (GetAddSp $this))>;
1922+ }
1923+
18761924multiclass F_ATOMIC_2_AS<RegTyInfo t, SDPatternOperator frag, string op_str, list<Predicate> preds = []> {
18771925 defvar frag_pat = (frag node:$a, node:$b);
18781926 defm _G : F_ATOMIC_2<t, "", ".global", op_str, ATOMIC_GLOBAL_CHK<frag_pat>, preds>;
@@ -1934,29 +1982,11 @@ defm INT_PTX_ATOM_XOR_64 : F_ATOMIC_2_AS<I64RT, atomic_load_xor_i64, "xor.b64",
19341982// Define atom.cas for all combinations of size x addrspace x memory order
19351983// supported in PTX *and* on the hardware.
19361984foreach t = [I32RT, I64RT] in {
1937- foreach order = ["acquire", "release", "acq_rel", "monotonic"] in {
1938- defvar cas_order_string = !if(!eq(order, "monotonic"), ".relaxed", "."#order);
1939- defvar atomic_cmp_swap_pat = !cast<PatFrag>("atomic_cmp_swap_i"#t.Size#_#order);
1940-
1941- // Instantiate scoped versions of the atomic compare and swap pattern
1942- defm atomic_cmp_swap_i#t.Size#_#order: nvvm_ternary_atomic_op_scoped<atomic_cmp_swap_pat>;
1943-
1944- foreach scope = ["cta", "cluster", "gpu", "sys"] in {
1945- defvar atomic_cmp_swap_pat_scoped = !cast<PatFrag>("atomic_cmp_swap_i"#t.Size#_#order#_#scope);
1985+ defvar atomic_cmp_swap_pat = !cast<PatFrag>("atomic_cmp_swap_i"#t.Size);
1986+ defm INT_PTX_ATOM_CAS_#t.Size
1987+ : F_ATOMIC_3_MANYOPERAND<t, ".cas.b"#t.Size>;
19461988
1947- // Syncscope is only supported for SM70+
1948- defm INT_PTX_ATOM_CAS_#t.Size#_#order#_#scope
1949- : F_ATOMIC_3_AS<t, atomic_cmp_swap_pat_scoped, "."#scope, cas_order_string, "cas.b"#t.Size, [hasSM<70>, hasPTX<63>]>;
1950- }
1951-
1952- // Note that AtomicExpand will convert cmpxchg seq_cst to a cmpxchg monotonic with fences around it.
1953- // Memory orders are only supported for SM70+, PTX63+- so we have two sets of instruction definitions-
1954- // for SM70+, and "old" ones which lower to "atom.cas", for earlier archs.
1955- defm INT_PTX_ATOM_CAS_#t.Size#_#order
1956- : F_ATOMIC_3_AS<t, atomic_cmp_swap_pat, "", cas_order_string, "cas.b"#t.Size, [hasSM<70>, hasPTX<63>]>;
1957- defm INT_PTX_ATOM_CAS_#t.Size#_#order#_old
1958- : F_ATOMIC_3_AS<t, atomic_cmp_swap_pat, "", "", "cas.b"#t.Size, []>;
1959- }
1989+ defm INT_PTX_ATOM_CAS_PAT_#t.Size : F_ATOMIC_3_MANYOPERAND_PATTERN<t, "INT_PTX_ATOM_CAS_"#t.Size, atomic_cmp_swap_pat, atomic_cmp_swap>;
19601990}
19611991
19621992// Note that 16-bit CAS support in PTX is emulated.
0 commit comments