Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -2286,13 +2286,15 @@ class TargetLoweringBase {
/// standard ABI uses a fence before a seq_cst load instead of after a
/// seq_cst store).
/// @{
virtual Instruction *emitLeadingFence(IRBuilderBase &Builder,
Instruction *Inst,
AtomicOrdering Ord) const;

virtual Instruction *emitTrailingFence(IRBuilderBase &Builder,
Instruction *Inst,
AtomicOrdering Ord) const;
virtual Instruction *
emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord,
SyncScope::ID SSID = SyncScope::System) const;

virtual Instruction *
emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord,
SyncScope::ID SSID = SyncScope::System) const;
/// @}

// Emits code that executes when the comparison result in the ll/sc
Expand Down
18 changes: 13 additions & 5 deletions llvm/lib/CodeGen/AtomicExpandPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ class AtomicExpandImpl {
const DataLayout *DL = nullptr;

private:
bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
SyncScope::ID SSID = SyncScope::System);
IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
bool tryExpandAtomicLoad(LoadInst *LI);
Expand Down Expand Up @@ -303,6 +304,7 @@ bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {

if (TLI->shouldInsertFencesForAtomic(I)) {
auto FenceOrdering = AtomicOrdering::Monotonic;
SyncScope::ID SSID = SyncScope::System;
if (LI && isAcquireOrStronger(LI->getOrdering())) {
FenceOrdering = LI->getOrdering();
LI->setOrdering(AtomicOrdering::Monotonic);
Expand All @@ -325,13 +327,18 @@ bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
// expandAtomicCmpXchg in that case.
FenceOrdering = CASI->getMergedOrdering();
auto CASOrdering = TLI->atomicOperationOrderAfterFenceSplit(CASI);
SSID = CASI->getSyncScopeID();

CASI->setSuccessOrdering(CASOrdering);
CASI->setFailureOrdering(CASOrdering);
// If CAS ordering is monotonic, then the operation will
// take default scope. Otherwise, it will retain its scope
if (CASOrdering != AtomicOrdering::Monotonic)
CASI->setSyncScopeID(SSID);
}

if (FenceOrdering != AtomicOrdering::Monotonic) {
MadeChange |= bracketInstWithFences(I, FenceOrdering);
MadeChange |= bracketInstWithFences(I, FenceOrdering, SSID);
}
} else if (I->hasAtomicStore() &&
TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
Expand Down Expand Up @@ -432,12 +439,13 @@ PreservedAnalyses AtomicExpandPass::run(Function &F,
}

bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
AtomicOrdering Order) {
AtomicOrdering Order,
SyncScope::ID SSID) {
ReplacementIRBuilder Builder(I, *DL);

auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order, SSID);

auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order, SSID);
// We have a guard here because not every atomic operation generates a
// trailing fence.
if (TrailingFence)
Expand Down
10 changes: 6 additions & 4 deletions llvm/lib/CodeGen/TargetLoweringBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2320,18 +2320,20 @@ TargetLoweringBase::getAtomicMemOperandFlags(const Instruction &AI,

Instruction *TargetLoweringBase::emitLeadingFence(IRBuilderBase &Builder,
Instruction *Inst,
AtomicOrdering Ord) const {
AtomicOrdering Ord,
SyncScope::ID SSID) const {
if (isReleaseOrStronger(Ord) && Inst->hasAtomicStore())
return Builder.CreateFence(Ord);
return Builder.CreateFence(Ord, SSID);
else
return nullptr;
}

Instruction *TargetLoweringBase::emitTrailingFence(IRBuilderBase &Builder,
Instruction *Inst,
AtomicOrdering Ord) const {
AtomicOrdering Ord,
SyncScope::ID SSID) const {
if (isAcquireOrStronger(Ord))
return Builder.CreateFence(Ord);
return Builder.CreateFence(Ord, SSID);
else
return nullptr;
}
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21221,7 +21221,8 @@ Instruction *ARMTargetLowering::makeDMB(IRBuilderBase &Builder,
// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
Instruction *ARMTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
Instruction *Inst,
AtomicOrdering Ord) const {
AtomicOrdering Ord,
SyncScope::ID SSID) const {
switch (Ord) {
case AtomicOrdering::NotAtomic:
case AtomicOrdering::Unordered:
Expand All @@ -21246,7 +21247,8 @@ Instruction *ARMTargetLowering::emitLeadingFence(IRBuilderBase &Builder,

Instruction *ARMTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
Instruction *Inst,
AtomicOrdering Ord) const {
AtomicOrdering Ord,
SyncScope::ID SSID) const {
switch (Ord) {
case AtomicOrdering::NotAtomic:
case AtomicOrdering::Unordered:
Expand Down
10 changes: 6 additions & 4 deletions llvm/lib/Target/ARM/ARMISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -666,10 +666,12 @@ class VectorType;
void
emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;

Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
Instruction *emitLeadingFence(
IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord,
SyncScope::ID SSID = SyncScope::ID SyncScope::System) const override;
Instruction *emitTrailingFence(
IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord,
SyncScope::ID SSID = SyncScope::ID SyncScope::System) const override;

unsigned getMaxSupportedInterleaveFactor() const override;

Expand Down
13 changes: 8 additions & 5 deletions llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6055,23 +6055,26 @@ AtomicOrdering NVPTXTargetLowering::atomicOperationOrderAfterFenceSplit(

Instruction *NVPTXTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
Instruction *Inst,
AtomicOrdering Ord) const {
AtomicOrdering Ord,
SyncScope::ID SSID) const {
if (!isa<AtomicCmpXchgInst>(Inst))
return TargetLoweringBase::emitLeadingFence(Builder, Inst, Ord);

// Specialize for cmpxchg
// Emit a fence.sc leading fence for cmpxchg seq_cst which are not emulated
if (isReleaseOrStronger(Ord))
return Ord == AtomicOrdering::SequentiallyConsistent
? Builder.CreateFence(AtomicOrdering::SequentiallyConsistent)
: Builder.CreateFence(AtomicOrdering::Release);
? Builder.CreateFence(AtomicOrdering::SequentiallyConsistent,
SSID)
: Builder.CreateFence(AtomicOrdering::Release, SSID);

return nullptr;
}

Instruction *NVPTXTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
Instruction *Inst,
AtomicOrdering Ord) const {
AtomicOrdering Ord,
SyncScope::ID SSID) const {
// Specialize for cmpxchg
if (!isa<AtomicCmpXchgInst>(Inst))
return TargetLoweringBase::emitTrailingFence(Builder, Inst, Ord);
Expand All @@ -6084,7 +6087,7 @@ Instruction *NVPTXTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
if (isAcquireOrStronger(Ord) &&
(Ord != AtomicOrdering::SequentiallyConsistent ||
CASWidth < STI.getMinCmpXchgSizeInBits()))
return Builder.CreateFence(AtomicOrdering::Acquire);
return Builder.CreateFence(AtomicOrdering::Acquire, SSID);

return nullptr;
}
Expand Down
12 changes: 8 additions & 4 deletions llvm/lib/Target/NVPTX/NVPTXISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -266,10 +266,14 @@ class NVPTXTargetLowering : public TargetLowering {
AtomicOrdering
atomicOperationOrderAfterFenceSplit(const Instruction *I) const override;

Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
Instruction *
emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord,
SyncScope::ID SSID = SyncScope::System) const override;
Instruction *
emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord,
SyncScope::ID SSID = SyncScope::System) const override;

private:
const NVPTXSubtarget &STI; // cache the subtarget here
Expand Down
67 changes: 50 additions & 17 deletions llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,27 @@ def AS_match {
}];
}

multiclass nvvm_ternary_atomic_op_scoped<SDPatternOperator frag> {
defvar frag_pat = (frag node:$ptr, node:$cmp, node:$val);
def NAME#_cta: PatFrag<!setdagop(frag_pat, ops),
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), [{
return Scopes[cast<MemSDNode>(N)->getSyncScopeID()] == NVPTX::Scope::Block;
}]>;
def NAME#_cluster : PatFrag<!setdagop(frag_pat, ops),
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), [{
return Scopes[cast<MemSDNode>(N)->getSyncScopeID()] == NVPTX::Scope::Cluster;
}]>;
def NAME#_gpu: PatFrag<!setdagop(frag_pat, ops),
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), [{
return Scopes[cast<MemSDNode>(N)->getSyncScopeID()] == NVPTX::Scope::Device;
}]>;
def NAME#_sys: PatFrag<!setdagop(frag_pat, ops),
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), [{
return Scopes[cast<MemSDNode>(N)->getSyncScopeID()] == NVPTX::Scope::System;
}]>;
}


// A node that will be replaced with the current PTX version.
class PTX {
SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
Expand Down Expand Up @@ -2022,40 +2043,41 @@ multiclass F_ATOMIC_2_NEG<ValueType regT, NVPTXRegClass regclass, string SpaceSt

// has 3 operands
multiclass F_ATOMIC_3_imp<ValueType ptrT, NVPTXRegClass ptrclass,
ValueType regT, NVPTXRegClass regclass, string SemStr,
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
Operand IMMType, list<Predicate> Pred> {
ValueType regT, NVPTXRegClass regclass, string SemStr,
string ScopeStr, string SpaceStr, string TypeStr, string OpcStr,
PatFrag IntOp, Operand IMMType, list<Predicate> Pred> {
let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
def reg : NVPTXInst<(outs regclass:$dst),
(ins ptrclass:$addr, regclass:$b, regclass:$c),
!strconcat("atom", SemStr, SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
!strconcat("atom", SemStr, ScopeStr, SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
[(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>,
Requires<Pred>;

def imm1 : NVPTXInst<(outs regclass:$dst),
(ins ptrclass:$addr, IMMType:$b, regclass:$c),
!strconcat("atom", SemStr, SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
!strconcat("atom", SemStr, ScopeStr, SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
[(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>,
Requires<Pred>;

def imm2 : NVPTXInst<(outs regclass:$dst),
(ins ptrclass:$addr, regclass:$b, IMMType:$c),
!strconcat("atom", SemStr, SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
!strconcat("atom", SemStr, ScopeStr, SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
[(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>,
Requires<Pred>;

def imm3 : NVPTXInst<(outs regclass:$dst),
(ins ptrclass:$addr, IMMType:$b, IMMType:$c),
!strconcat("atom", SemStr, SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
!strconcat("atom", SemStr, ScopeStr, SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
[(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>,
Requires<Pred>;
}
}
multiclass F_ATOMIC_3<ValueType regT, NVPTXRegClass regclass, string SemStr, string SpaceStr,
string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
defm p32 : F_ATOMIC_3_imp<i32, Int32Regs, regT, regclass, SemStr, SpaceStr, TypeStr,
multiclass F_ATOMIC_3<ValueType regT, NVPTXRegClass regclass, string SemStr, string ScopeStr,
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
list<Predicate> Pred = []> {
defm p32 : F_ATOMIC_3_imp<i32, Int32Regs, regT, regclass, SemStr, ScopeStr, SpaceStr, TypeStr,
OpcStr, IntOp, IMMType, Pred>;
defm p64 : F_ATOMIC_3_imp<i64, Int64Regs, regT, regclass, SemStr, SpaceStr, TypeStr,
defm p64 : F_ATOMIC_3_imp<i64, Int64Regs, regT, regclass, SemStr, ScopeStr, SpaceStr, TypeStr,
OpcStr, IntOp, IMMType, Pred>;
}

Expand Down Expand Up @@ -2469,10 +2491,12 @@ foreach size = ["i16", "i32", "i64"] in {
// ".cas", atomic_cmp_swap_i32_acquire_global, i32imm,
// [hasSM<70>, hasPTX<63>]>
multiclass INT_PTX_ATOM_CAS<string atomic_cmp_swap_pat, string type,
string order, string addrspace, list<Predicate> preds>
string order, string scope, string addrspace,
list<Predicate> preds>
: F_ATOMIC_3<!cast<ValueType>("i"#type),
!cast<NVPTXRegClass>("Int"#type#"Regs"),
order,
scope,
addrspace,
".b"#type,
".cas",
Expand All @@ -2487,26 +2511,35 @@ foreach size = ["32", "64"] in {
defvar cas_addrspace_string = !if(!eq(addrspace, "generic"), "", "."#addrspace);
foreach order = ["acquire", "release", "acq_rel", "monotonic"] in {
defvar cas_order_string = !if(!eq(order, "monotonic"), ".relaxed", "."#order);
defvar atomic_cmp_swap_pat = !cast<PatFrag>("atomic_cmp_swap_i"#size#_#order#_#addrspace);
defm atomic_cmp_swap_i#size#_#order#_#addrspace: nvvm_ternary_atomic_op_scoped<atomic_cmp_swap_pat>;

foreach scope = ["cta", "cluster", "gpu", "sys"] in {
defm INT_PTX_ATOM_CAS_#size#_#order#addrspace#scope
: INT_PTX_ATOM_CAS<"atomic_cmp_swap_i"#size#_#order#_#addrspace#_#scope, size,
cas_order_string, "."#scope, cas_addrspace_string,
[hasSM<70>, hasPTX<63>]>;
}
// Note that AtomicExpand will convert cmpxchg seq_cst to a cmpxchg monotonic with fences around it.
// Memory orders are only supported for SM70+, PTX63+- so we have two sets of instruction definitions-
// for SM70+, and "old" ones which lower to "atom.cas", for earlier archs.
defm INT_PTX_ATOM_CAS_#size#_#order#addrspace
: INT_PTX_ATOM_CAS<"atomic_cmp_swap_i"#size#_#order#_#addrspace, size,
cas_order_string, cas_addrspace_string,
cas_order_string, "", cas_addrspace_string,
[hasSM<70>, hasPTX<63>]>;
defm INT_PTX_ATOM_CAS_#size#_#order#_old#addrspace
: INT_PTX_ATOM_CAS<"atomic_cmp_swap_i"#size#_#order#_#addrspace, size,
"", cas_addrspace_string, []>;
"", "", cas_addrspace_string, []>;
}
}
}

// Note that 16-bit CAS support in PTX is emulated.
defm INT_PTX_ATOM_CAS_G_16 : F_ATOMIC_3<i16, Int16Regs, "", ".global", ".b16", ".cas",
defm INT_PTX_ATOM_CAS_G_16 : F_ATOMIC_3<i16, Int16Regs, "", "", ".global", ".b16", ".cas",
atomic_cmp_swap_i16_global, i16imm, [hasSM<70>, hasPTX<63>]>;
defm INT_PTX_ATOM_CAS_S_16 : F_ATOMIC_3<i16, Int16Regs, "", ".shared", ".b16", ".cas",
defm INT_PTX_ATOM_CAS_S_16 : F_ATOMIC_3<i16, Int16Regs, "", "", ".shared", ".b16", ".cas",
atomic_cmp_swap_i16_shared, i16imm, [hasSM<70>, hasPTX<63>]>;
defm INT_PTX_ATOM_CAS_GEN_16 : F_ATOMIC_3<i16, Int16Regs, "", "", ".b16", ".cas",
defm INT_PTX_ATOM_CAS_GEN_16 : F_ATOMIC_3<i16, Int16Regs, "", "", "", ".b16", ".cas",
atomic_cmp_swap_i16_generic, i16imm, [hasSM<70>, hasPTX<63>]>;

// Support for scoped atomic operations. Matches
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12430,7 +12430,8 @@ static Instruction *callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id) {
// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
Instruction *Inst,
AtomicOrdering Ord) const {
AtomicOrdering Ord,
SyncScope::ID SSID) const {
if (Ord == AtomicOrdering::SequentiallyConsistent)
return callIntrinsic(Builder, Intrinsic::ppc_sync);
if (isReleaseOrStronger(Ord))
Expand All @@ -12440,7 +12441,8 @@ Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,

Instruction *PPCTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
Instruction *Inst,
AtomicOrdering Ord) const {
AtomicOrdering Ord,
SyncScope::ID SSID) const {
if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
// See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
// http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
Expand Down
12 changes: 8 additions & 4 deletions llvm/lib/Target/PowerPC/PPCISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -921,10 +921,14 @@ namespace llvm {
return true;
}

Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
Instruction *
emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord,
SyncScope::ID SSID = SyncScope::System) const override;
Instruction *
emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord,
SyncScope::ID SSID = SyncScope::System) const override;

bool shouldInlineQuadwordAtomics() const;

Expand Down
Loading