@@ -2335,7 +2335,82 @@ multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
23352335 foreach space = ["gen", "global", "shared"] in {
23362336 defm _#sem#scope#space :
23372337 ATOM2N_impl<OpStr, IntTypeStr, TypeStr, scope, space, sem,
2338- t, !listconcat(Preds, [hasAtomScope, hasAtomSemantics])>;
2338+ t, !listconcat(Preds, [hasAtomScope, hasMemoryOrdering])>;
2339+ }
2340+ }
2341+ }
2342+ }
2343+
2344+ // Helper for 3-operand atomic intrinsic patterns (like CAS with semantics)
2345+ multiclass ATOM3_INTRINSIC_PATTERN<RegTyInfo t, string OpStr, string IntTypeStr,
2346+ string InstructionName,
2347+ string ScopeStr, string SpaceStr, string SemStr,
2348+ list<Predicate> Preds> {
2349+ defvar intrinsic = !cast<Intrinsic>(
2350+ "int_nvvm_atomic_" # OpStr
2351+ # "_" # SpaceStr # "_" # IntTypeStr
2352+ # !if(!empty(SemStr), "", "_" # SemStr)
2353+ # !if(!eq(ScopeStr, "gpu"), "", "_" # ScopeStr));
2354+
2355+ defvar ordering = !cond(
2356+ !eq(SemStr, "acquire"): Ordering_acquire,
2357+ !eq(SemStr, "release"): Ordering_release,
2358+ !eq(SemStr, "acq_rel"): Ordering_acquire_release,
2359+ true: Ordering_not_atomic);
2360+
2361+ defvar scope_pat = !cond(
2362+ !eq(ScopeStr, "gpu"): Scope_device,
2363+ !eq(ScopeStr, "cta"): Scope_cta,
2364+ !eq(ScopeStr, "sys"): Scope_sys,
2365+ true: Scope_device);
2366+
2367+ defvar space_pat = !cond(
2368+ !eq(SpaceStr, "gen"): AddrSpace_gen,
2369+ !eq(SpaceStr, "global"): AddrSpace_global,
2370+ !eq(SpaceStr, "shared"): AddrSpace_shared,
2371+ true: AddrSpace_gen);
2372+
2373+ let Predicates = Preds in {
2374+ def : Pat<(t.Ty (intrinsic addr:$addr, t.Ty:$b, t.Ty:$c)),
2375+ (!cast<Instruction>(InstructionName # "_rr") ADDR:$addr, t.Ty:$b, t.Ty:$c, ordering, scope_pat, space_pat)>;
2376+
2377+ def : Pat<(t.Ty (intrinsic addr:$addr, (t.Ty t.ImmNode:$b), t.Ty:$c)),
2378+ (!cast<Instruction>(InstructionName # "_ir") ADDR:$addr, (t.Ty t.ImmNode:$b), t.Ty:$c, ordering, scope_pat, space_pat)>;
2379+
2380+ def : Pat<(t.Ty (intrinsic addr:$addr, t.Ty:$b, (t.Ty t.ImmNode:$c))),
2381+ (!cast<Instruction>(InstructionName # "_ri") ADDR:$addr, t.Ty:$b, (t.Ty t.ImmNode:$c), ordering, scope_pat, space_pat)>;
2382+
2383+ def : Pat<(t.Ty (intrinsic addr:$addr, (t.Ty t.ImmNode:$b), (t.Ty t.ImmNode:$c))),
2384+ (!cast<Instruction>(InstructionName # "_ii") ADDR:$addr, (t.Ty t.ImmNode:$b), (t.Ty t.ImmNode:$c), ordering, scope_pat, space_pat)>;
2385+ }
2386+ }
2387+
2388+ multiclass ATOM3S_impl<string OpStr, string IntTypeStr,
2389+ string InstructionName, RegTyInfo t, list<Predicate> Preds> {
2390+ // Similar to ATOM2S_impl but for 3-operand atomics like CAS
2391+ foreach scope = ["cta", "sys"] in {
2392+ foreach space = ["gen", "global", "shared"] in {
2393+ defm _#scope#space : ATOM3_INTRINSIC_PATTERN<t, OpStr, IntTypeStr,
2394+ InstructionName, scope, space, "",
2395+ !listconcat(Preds, [hasAtomScope])>;
2396+ }
2397+ }
2398+
2399+ foreach scope = ["gpu"] in {
2400+ foreach space = ["gen", "global", "shared"] in {
2401+ defm _#scope#space : ATOM3_INTRINSIC_PATTERN<t, OpStr, IntTypeStr,
2402+ InstructionName, scope, space, "", Preds>;
2403+ }
2404+ }
2405+
2406+ // Intrinsics with semantics
2407+ foreach sem = ["acquire", "release", "acq_rel"] in {
2408+ foreach scope = ["gpu", "cta", "sys"] in {
2409+ foreach space = ["gen", "global", "shared"] in {
2410+ defm _#sem#scope#space :
2411+ ATOM3_INTRINSIC_PATTERN<t, OpStr, IntTypeStr,
2412+ InstructionName, scope, space, sem,
2413+ !listconcat(Preds, [hasAtomScope, hasMemoryOrdering])>;
23392414 }
23402415 }
23412416 }
@@ -2401,12 +2476,9 @@ multiclass ATOM2_incdec_impl<string OpStr> {
24012476
24022477// atom.cas
24032478multiclass ATOM3_cas_impl<string OpStr> {
2404- defm _b16 : F_ATOMIC_3_INTRINSIC_PATTERN<I16RT, OpStr, "INT_PTX_ATOM_CAS_16">;
2405- defm _b32 : F_ATOMIC_3_INTRINSIC_PATTERN<I32RT, OpStr, "INT_PTX_ATOM_CAS_32">;
2406- defm _b64 : F_ATOMIC_3_INTRINSIC_PATTERN<I64RT, OpStr, "INT_PTX_ATOM_CAS_64">;
2407- // TODO: rewrite these two too:
2408- // defm _f32 : ATOM3S_impl<OpStr, "f", "b32", F32RT, []>;
2409- // defm _f64 : ATOM3S_impl<OpStr, "f", "b64", F64RT, []>;
2479+ defm _b16 : ATOM3S_impl<OpStr, "i", "INT_PTX_ATOM_CAS_16", I16RT, []>;
2480+ defm _b32 : ATOM3S_impl<OpStr, "i", "INT_PTX_ATOM_CAS_32", I32RT, []>;
2481+ defm _b64 : ATOM3S_impl<OpStr, "i", "INT_PTX_ATOM_CAS_64", I64RT, []>;
24102482}
24112483
24122484defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
@@ -2472,9 +2544,9 @@ multiclass ATOM_LdA_impl<string IntTypeStr, string TypeStr,
24722544 string ScopeStr, ValueType regT, NVPTXRegClass regclass,
24732545 list<Predicate> Preds> {
24742546 defm _relaxed_ : ATOM_LdN_spaces_impl<IntTypeStr, TypeStr, ScopeStr, "relaxed",
2475- regT, regclass, !listconcat(Preds,[hasAtomSemantics ])>;
2547+ regT, regclass, !listconcat(Preds,[hasMemoryOrdering ])>;
24762548 defm _acquire_ : ATOM_LdN_spaces_impl<IntTypeStr, TypeStr, ScopeStr, "acquire",
2477- regT, regclass, !listconcat(Preds,[hasAtomSemantics ])>;
2549+ regT, regclass, !listconcat(Preds,[hasMemoryOrdering ])>;
24782550}
24792551
24802552// Constructs variants for different scopes of atomic op.
@@ -2557,9 +2629,9 @@ multiclass ATOM_StA_impl<string IntTypeStr, string TypeStr,
25572629 Operand ImmType, SDNode Imm, ValueType ImmTy,
25582630 list<Predicate> Preds> {
25592631 defm _relaxed_ : ATOM_StN_spaces_impl<IntTypeStr, TypeStr, ScopeStr, "relaxed",
2560- regT, regclass, ImmType, Imm, ImmTy, !listconcat(Preds,[hasAtomSemantics ])>;
2632+ regT, regclass, ImmType, Imm, ImmTy, !listconcat(Preds,[hasMemoryOrdering ])>;
25612633 defm _release_ : ATOM_StN_spaces_impl<IntTypeStr, TypeStr, ScopeStr, "release",
2562- regT, regclass, ImmType, Imm, ImmTy, !listconcat(Preds,[hasAtomSemantics ])>;
2634+ regT, regclass, ImmType, Imm, ImmTy, !listconcat(Preds,[hasMemoryOrdering ])>;
25632635}
25642636
25652637multiclass ATOM_StS_impl<string IntTypeStr, string TypeStr,
0 commit comments