@@ -2011,41 +2011,41 @@ multiclass F_ATOMIC_2_NEG<ValueType regT, NVPTXRegClass regclass, string SpaceSt
20112011
20122012// has 3 operands
20132013multiclass F_ATOMIC_3_imp<ValueType ptrT, NVPTXRegClass ptrclass,
2014- ValueType regT, NVPTXRegClass regclass,
2014+ ValueType regT, NVPTXRegClass regclass, string SemStr,
20152015 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
20162016 Operand IMMType, list<Predicate> Pred> {
20172017 let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
20182018 def reg : NVPTXInst<(outs regclass:$dst),
20192019 (ins ptrclass:$addr, regclass:$b, regclass:$c),
2020- !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
2020+ !strconcat("atom", SemStr, SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
20212021 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>,
20222022 Requires<Pred>;
20232023
20242024 def imm1 : NVPTXInst<(outs regclass:$dst),
20252025 (ins ptrclass:$addr, IMMType:$b, regclass:$c),
2026- !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
2026+ !strconcat("atom", SemStr, SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
20272027 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>,
20282028 Requires<Pred>;
20292029
20302030 def imm2 : NVPTXInst<(outs regclass:$dst),
20312031 (ins ptrclass:$addr, regclass:$b, IMMType:$c),
2032- !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
2032+ !strconcat("atom", SemStr, SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
20332033 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>,
20342034 Requires<Pred>;
20352035
20362036 def imm3 : NVPTXInst<(outs regclass:$dst),
20372037 (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
2038- !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
2038+ !strconcat("atom", SemStr, SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
20392039 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>,
20402040 Requires<Pred>;
20412041 }
20422042}
2043- multiclass F_ATOMIC_3<ValueType regT, NVPTXRegClass regclass, string SpaceStr , string TypeStr ,
2044- string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
2045- defm p32 : F_ATOMIC_3_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr ,
2046- IntOp, IMMType, Pred>;
2047- defm p64 : F_ATOMIC_3_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr ,
2048- IntOp, IMMType, Pred>;
2043+ multiclass F_ATOMIC_3<ValueType regT, NVPTXRegClass regclass, string SemStr , string SpaceStr ,
2044+ string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
2045+ defm p32 : F_ATOMIC_3_imp<i32, Int32Regs, regT, regclass, SemStr, SpaceStr, TypeStr ,
2046+ OpcStr, IntOp, IMMType, Pred>;
2047+ defm p64 : F_ATOMIC_3_imp<i64, Int64Regs, regT, regclass, SemStr, SpaceStr, TypeStr ,
2048+ OpcStr, IntOp, IMMType, Pred>;
20492049}
20502050
20512051// atom_add
@@ -2427,51 +2427,76 @@ defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".xor",
24272427defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
24282428 ".xor", atomic_load_xor_i64_gen, i64imm, imm, [hasSM<32>]>;
24292429
2430- // atom_cas
2431-
2432- def atomic_cmp_swap_i16_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
2433- (atomic_cmp_swap_i16 node:$a, node:$b, node:$c)>;
2434- def atomic_cmp_swap_i16_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
2435- (atomic_cmp_swap_i16 node:$a, node:$b, node:$c)>;
2436- def atomic_cmp_swap_i16_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
2437- (atomic_cmp_swap_i16 node:$a, node:$b, node:$c)>;
2438- def atomic_cmp_swap_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
2439- (atomic_cmp_swap_i32 node:$a, node:$b, node:$c)>;
2440- def atomic_cmp_swap_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
2441- (atomic_cmp_swap_i32 node:$a, node:$b, node:$c)>;
2442- def atomic_cmp_swap_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
2443- (atomic_cmp_swap_i32 node:$a, node:$b, node:$c)>;
2444- def atomic_cmp_swap_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
2445- (atomic_cmp_swap_i64 node:$a, node:$b, node:$c)>;
2446- def atomic_cmp_swap_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
2447- (atomic_cmp_swap_i64 node:$a, node:$b, node:$c)>;
2448- def atomic_cmp_swap_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
2449- (atomic_cmp_swap_i64 node:$a, node:$b, node:$c)>;
2450-
2451- defm INT_PTX_ATOM_CAS_G_16 : F_ATOMIC_3<i16, Int16Regs, ".global", ".b16", ".cas",
2452- atomic_cmp_swap_i16_g, i16imm, [hasSM<70>, hasPTX<63>]>;
2453- defm INT_PTX_ATOM_CAS_S_16 : F_ATOMIC_3<i16, Int16Regs, ".shared", ".b16", ".cas",
2454- atomic_cmp_swap_i16_s, i16imm, [hasSM<70>, hasPTX<63>]>;
2455- defm INT_PTX_ATOM_CAS_GEN_16 : F_ATOMIC_3<i16, Int16Regs, "", ".b16", ".cas",
2456- atomic_cmp_swap_i16_gen, i16imm, [hasSM<70>, hasPTX<63>]>;
2457- defm INT_PTX_ATOM_CAS_GEN_16_USE_G : F_ATOMIC_3<i16, Int16Regs, ".global", ".b16", ".cas",
2458- atomic_cmp_swap_i16_gen, i16imm, [hasSM<70>, hasPTX<63>]>;
2459- defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", ".cas",
2460- atomic_cmp_swap_i32_g, i32imm>;
2461- defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<i32, Int32Regs, ".shared", ".b32", ".cas",
2462- atomic_cmp_swap_i32_s, i32imm>;
2463- defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<i32, Int32Regs, "", ".b32", ".cas",
2464- atomic_cmp_swap_i32_gen, i32imm>;
2465- defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32",
2466- ".cas", atomic_cmp_swap_i32_gen, i32imm>;
2467- defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64", ".cas",
2468- atomic_cmp_swap_i64_g, i64imm>;
2469- defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<i64, Int64Regs, ".shared", ".b64", ".cas",
2470- atomic_cmp_swap_i64_s, i64imm>;
2471- defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<i64, Int64Regs, "", ".b64", ".cas",
2472- atomic_cmp_swap_i64_gen, i64imm>;
2473- defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64",
2474- ".cas", atomic_cmp_swap_i64_gen, i64imm>;
2430+ multiclass ternary_atomic_op_as {
2431+ // one record per address space
2432+ def NAME#_generic: PatFrag<(ops node:$ptr, node:$cmp, node:$val),
2433+ (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val),
2434+ AS_match.generic>;
2435+
2436+ def NAME#_global: PatFrag<(ops node:$ptr, node:$cmp, node:$val),
2437+ (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val),
2438+ AS_match.global>;
2439+
2440+ def NAME#_shared: PatFrag<(ops node:$ptr, node:$cmp, node:$val),
2441+ (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val),
2442+ AS_match.shared>;
2443+ }
2444+
2445+ // generate pattern fragments for size x memory order
2446+ // NOTE: i8 cmpxchg is not supported in ptx, and AtomicExpandPass will emulate all i8 cmpxchgs
2447+ // using larger-bitwidth cas
2448+ foreach size = ["i16", "i32", "i64"] in {
2449+ foreach order = ["", "_monotonic", "_acquire", "_release", "_acq_rel", "_seq_cst"] in {
2450+ defm atomic_cmp_swap#_#size#order: ternary_atomic_op_as;
2451+ }
2452+ }
2453+
2454+ // eg. with type = 32, order = ".acquire", addrspace = ".global",
2455+ // atomic_cmp_swap_pat = atomic_cmp_swap_i32_acquire_global.
2456+ // preds = [hasSM<70>, hasPTX<63>]
2457+ // F_ATOMIC_3<i32, Int32Regs, ".acquire", ".global", ".b32",
2458+ // ".cas", atomic_cmp_swap_i32_acquire_global, i32imm,
2459+ // [hasSM<70>, hasPTX<63>]>
2460+ multiclass INT_PTX_ATOM_CAS<string atomic_cmp_swap_pat, string type,
2461+ string order, string addrspace, list<Predicate> preds>
2462+ : F_ATOMIC_3<!cast<ValueType>("i"#type),
2463+ !cast<NVPTXRegClass>("Int"#type#"Regs"),
2464+ order,
2465+ addrspace,
2466+ ".b"#type,
2467+ ".cas",
2468+ !cast<PatFrag>(atomic_cmp_swap_pat),
2469+ !cast<Operand>("i"#type#"imm"),
2470+ preds>;
2471+
2472+ // Define atom.cas for all combinations of size x addrspace x memory order
2473+ // supported in PTX *and* on the hardware.
2474+ foreach size = ["32", "64"] in {
2475+ foreach addrspace = ["generic", "global", "shared"] in {
2476+ defvar cas_addrspace_string = !if(!eq(addrspace, "generic"), "", "."#addrspace);
2477+ foreach order = ["acquire", "release", "acq_rel", "monotonic"] in {
2478+ defvar cas_order_string = !if(!eq(order, "monotonic"), ".relaxed", "."#order);
2479+ // Note that AtomicExpand will convert cmpxchg seq_cst to a cmpxchg monotonic with fences around it.
2480+ // Memory orders are only supported for SM70+, PTX63+- so we have two sets of instruction definitions-
2481+ // for SM70+, and "old" ones which lower to "atom.cas", for earlier archs.
2482+ defm INT_PTX_ATOM_CAS_#size#_#order#addrspace
2483+ : INT_PTX_ATOM_CAS<"atomic_cmp_swap_i"#size#_#order#_#addrspace, size,
2484+ cas_order_string, cas_addrspace_string,
2485+ [hasSM<70>, hasPTX<63>]>;
2486+ defm INT_PTX_ATOM_CAS_#size#_#order#_old#addrspace
2487+ : INT_PTX_ATOM_CAS<"atomic_cmp_swap_i"#size#_#order#_#addrspace, size,
2488+ "", cas_addrspace_string, []>;
2489+ }
2490+ }
2491+ }
2492+
2493+ // Note that 16-bit CAS support in PTX is emulated.
2494+ defm INT_PTX_ATOM_CAS_G_16 : F_ATOMIC_3<i16, Int16Regs, "", ".global", ".b16", ".cas",
2495+ atomic_cmp_swap_i16_global, i16imm, [hasSM<70>, hasPTX<63>]>;
2496+ defm INT_PTX_ATOM_CAS_S_16 : F_ATOMIC_3<i16, Int16Regs, "", ".shared", ".b16", ".cas",
2497+ atomic_cmp_swap_i16_shared, i16imm, [hasSM<70>, hasPTX<63>]>;
2498+ defm INT_PTX_ATOM_CAS_GEN_16 : F_ATOMIC_3<i16, Int16Regs, "", "", ".b16", ".cas",
2499+ atomic_cmp_swap_i16_generic, i16imm, [hasSM<70>, hasPTX<63>]>;
24752500
24762501// Support for scoped atomic operations. Matches
24772502// int_nvvm_atomic_{op}_{space}_{type}_{scope}
0 commit comments