diff --git a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h index 81a5453bac26c..38d6b77916ca8 100644 --- a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h +++ b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h @@ -24,168 +24,6 @@ namespace llvm { namespace mca { -/// A node of a memory dependency graph. A MemoryGroup describes a set of -/// instructions with same memory dependencies. -/// -/// By construction, instructions of a MemoryGroup don't depend on each other. -/// At dispatch stage, instructions are mapped by the LSUnit to MemoryGroups. -/// A Memory group identifier is then stored as a "token" in field -/// Instruction::LSUTokenID of each dispatched instructions. That token is used -/// internally by the LSUnit to track memory dependencies. -class MemoryGroup { - unsigned NumPredecessors = 0; - unsigned NumExecutingPredecessors = 0; - unsigned NumExecutedPredecessors = 0; - - unsigned NumInstructions = 0; - unsigned NumExecuting = 0; - unsigned NumExecuted = 0; - // Successors that are in a order dependency with this group. - SmallVector OrderSucc; - // Successors that are in a data dependency with this group. - SmallVector DataSucc; - - CriticalDependency CriticalPredecessor; - InstRef CriticalMemoryInstruction; - - MemoryGroup(const MemoryGroup &) = delete; - MemoryGroup &operator=(const MemoryGroup &) = delete; - -public: - MemoryGroup() = default; - MemoryGroup(MemoryGroup &&) = default; - - size_t getNumSuccessors() const { - return OrderSucc.size() + DataSucc.size(); - } - unsigned getNumPredecessors() const { return NumPredecessors; } - unsigned getNumExecutingPredecessors() const { - return NumExecutingPredecessors; - } - unsigned getNumExecutedPredecessors() const { - return NumExecutedPredecessors; - } - unsigned getNumInstructions() const { return NumInstructions; } - unsigned getNumExecuting() const { return NumExecuting; } - unsigned getNumExecuted() const { return NumExecuted; } - - const InstRef &getCriticalMemoryInstruction() const { - return CriticalMemoryInstruction; - } - const CriticalDependency &getCriticalPredecessor() const { - return CriticalPredecessor; - } - - void addSuccessor(MemoryGroup *Group, bool IsDataDependent) { - // Do not need to add a dependency if there is no data - // dependency and all instructions from this group have been - // issued already. - if (!IsDataDependent && isExecuting()) - return; - - Group->NumPredecessors++; - assert(!isExecuted() && "Should have been removed!"); - if (isExecuting()) - Group->onGroupIssued(CriticalMemoryInstruction, IsDataDependent); - - if (IsDataDependent) - DataSucc.emplace_back(Group); - else - OrderSucc.emplace_back(Group); - } - - bool isWaiting() const { - return NumPredecessors > - (NumExecutingPredecessors + NumExecutedPredecessors); - } - bool isPending() const { - return NumExecutingPredecessors && - ((NumExecutedPredecessors + NumExecutingPredecessors) == - NumPredecessors); - } - bool isReady() const { return NumExecutedPredecessors == NumPredecessors; } - bool isExecuting() const { - return NumExecuting && (NumExecuting == (NumInstructions - NumExecuted)); - } - bool isExecuted() const { return NumInstructions == NumExecuted; } - - void onGroupIssued(const InstRef &IR, bool ShouldUpdateCriticalDep) { - assert(!isReady() && "Unexpected group-start event!"); - NumExecutingPredecessors++; - - if (!ShouldUpdateCriticalDep) - return; - - unsigned Cycles = IR.getInstruction()->getCyclesLeft(); - if (CriticalPredecessor.Cycles < Cycles) { - CriticalPredecessor.IID = IR.getSourceIndex(); - CriticalPredecessor.Cycles = Cycles; - } - } - - void onGroupExecuted() { - assert(!isReady() && "Inconsistent state found!"); - NumExecutingPredecessors--; - NumExecutedPredecessors++; - } - - void onInstructionIssued(const InstRef &IR) { - assert(!isExecuting() && "Invalid internal state!"); - ++NumExecuting; - - // update the CriticalMemDep. - const Instruction &IS = *IR.getInstruction(); - if ((bool)CriticalMemoryInstruction) { - const Instruction &OtherIS = *CriticalMemoryInstruction.getInstruction(); - if (OtherIS.getCyclesLeft() < IS.getCyclesLeft()) - CriticalMemoryInstruction = IR; - } else { - CriticalMemoryInstruction = IR; - } - - if (!isExecuting()) - return; - - // Notify successors that this group started execution. - for (MemoryGroup *MG : OrderSucc) { - MG->onGroupIssued(CriticalMemoryInstruction, false); - // Release the order dependency with this group. - MG->onGroupExecuted(); - } - - for (MemoryGroup *MG : DataSucc) - MG->onGroupIssued(CriticalMemoryInstruction, true); - } - - void onInstructionExecuted(const InstRef &IR) { - assert(isReady() && !isExecuted() && "Invalid internal state!"); - --NumExecuting; - ++NumExecuted; - - if (CriticalMemoryInstruction && - CriticalMemoryInstruction.getSourceIndex() == IR.getSourceIndex()) { - CriticalMemoryInstruction.invalidate(); - } - - if (!isExecuted()) - return; - - // Notify data dependent successors that this group has finished execution. - for (MemoryGroup *MG : DataSucc) - MG->onGroupExecuted(); - } - - void addInstruction() { - assert(!getNumSuccessors() && "Cannot add instructions to this group!"); - ++NumInstructions; - } - - void cycleEvent() { - if (isWaiting() && CriticalPredecessor.Cycles) - CriticalPredecessor.Cycles--; - } -}; - /// Abstract base interface for LS (load/store) units in llvm-mca. class LSUnitBase : public HardwareUnit { /// Load queue size. @@ -214,10 +52,6 @@ class LSUnitBase : public HardwareUnit { /// alias with stores. const bool NoAlias; - /// Used to map group identifiers to MemoryGroups. - DenseMap> Groups; - unsigned NextGroupID; - public: LSUnitBase(const MCSchedModel &SM, unsigned LoadQueueSize, unsigned StoreQueueSize, bool AssumeNoAlias); @@ -265,72 +99,35 @@ class LSUnitBase : public HardwareUnit { bool isSQFull() const { return SQSize && SQSize == UsedSQEntries; } bool isLQFull() const { return LQSize && LQSize == UsedLQEntries; } - bool isValidGroupID(unsigned Index) const { - return Index && Groups.contains(Index); - } - /// Check if a peviously dispatched instruction IR is now ready for execution. - bool isReady(const InstRef &IR) const { - unsigned GroupID = IR.getInstruction()->getLSUTokenID(); - const MemoryGroup &Group = getGroup(GroupID); - return Group.isReady(); - } + virtual bool isReady(const InstRef &IR) const = 0; /// Check if instruction IR only depends on memory instructions that are /// currently executing. - bool isPending(const InstRef &IR) const { - unsigned GroupID = IR.getInstruction()->getLSUTokenID(); - const MemoryGroup &Group = getGroup(GroupID); - return Group.isPending(); - } + virtual bool isPending(const InstRef &IR) const = 0; /// Check if instruction IR is still waiting on memory operations, and the /// wait time is still unknown. - bool isWaiting(const InstRef &IR) const { - unsigned GroupID = IR.getInstruction()->getLSUTokenID(); - const MemoryGroup &Group = getGroup(GroupID); - return Group.isWaiting(); - } + virtual bool isWaiting(const InstRef &IR) const = 0; - bool hasDependentUsers(const InstRef &IR) const { - unsigned GroupID = IR.getInstruction()->getLSUTokenID(); - const MemoryGroup &Group = getGroup(GroupID); - return !Group.isExecuted() && Group.getNumSuccessors(); - } + virtual bool hasDependentUsers(const InstRef &IR) const = 0; - const MemoryGroup &getGroup(unsigned Index) const { - assert(isValidGroupID(Index) && "Group doesn't exist!"); - return *Groups.find(Index)->second; - } + virtual const CriticalDependency getCriticalPredecessor(unsigned GroupId) = 0; - MemoryGroup &getGroup(unsigned Index) { - assert(isValidGroupID(Index) && "Group doesn't exist!"); - return *Groups.find(Index)->second; - } - - unsigned createMemoryGroup() { - Groups.insert( - std::make_pair(NextGroupID, std::make_unique())); - return NextGroupID++; - } - - virtual void onInstructionExecuted(const InstRef &IR); + virtual void onInstructionExecuted(const InstRef &IR) = 0; // Loads are tracked by the LDQ (load queue) from dispatch until completion. // Stores are tracked by the STQ (store queue) from dispatch until commitment. // By default we conservatively assume that the LDQ receives a load at // dispatch. Loads leave the LDQ at retirement stage. - virtual void onInstructionRetired(const InstRef &IR); + virtual void onInstructionRetired(const InstRef &IR) = 0; - virtual void onInstructionIssued(const InstRef &IR) { - unsigned GroupID = IR.getInstruction()->getLSUTokenID(); - Groups[GroupID]->onInstructionIssued(IR); - } + virtual void onInstructionIssued(const InstRef &IR) = 0; - virtual void cycleEvent(); + virtual void cycleEvent() = 0; #ifndef NDEBUG - void dump() const; + virtual void dump() const = 0; #endif }; @@ -396,6 +193,7 @@ class LSUnitBase : public HardwareUnit { /// the load/store queue(s). That also means, all the older loads/stores have /// already been executed. class LSUnit : public LSUnitBase { + // This class doesn't know about the latency of a load instruction. So, it // conservatively/pessimistically assumes that the latency of a load opcode // matches the instruction latency. @@ -434,6 +232,175 @@ class LSUnit : public LSUnitBase { // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is // conservatively treated as a load barrier. It forces older loads to execute // before newer loads are issued. + +protected: + /// A node of a memory dependency graph. A MemoryGroup describes a set of + /// instructions with same memory dependencies. + /// + /// By construction, instructions of a MemoryGroup don't depend on each other. + /// At dispatch stage, instructions are mapped by the LSUnit to MemoryGroups. + /// A Memory group identifier is then stored as a "token" in field + /// Instruction::LSUTokenID of each dispatched instructions. That token is + /// used internally by the LSUnit to track memory dependencies. + class MemoryGroup { + unsigned NumPredecessors = 0; + unsigned NumExecutingPredecessors = 0; + unsigned NumExecutedPredecessors = 0; + + unsigned NumInstructions = 0; + unsigned NumExecuting = 0; + unsigned NumExecuted = 0; + // Successors that are in a order dependency with this group. + SmallVector OrderSucc; + // Successors that are in a data dependency with this group. + SmallVector DataSucc; + + CriticalDependency CriticalPredecessor; + InstRef CriticalMemoryInstruction; + + MemoryGroup(const MemoryGroup &) = delete; + MemoryGroup &operator=(const MemoryGroup &) = delete; + + public: + MemoryGroup() = default; + MemoryGroup(MemoryGroup &&) = default; + + size_t getNumSuccessors() const { + return OrderSucc.size() + DataSucc.size(); + } + unsigned getNumPredecessors() const { return NumPredecessors; } + unsigned getNumExecutingPredecessors() const { + return NumExecutingPredecessors; + } + unsigned getNumExecutedPredecessors() const { + return NumExecutedPredecessors; + } + unsigned getNumInstructions() const { return NumInstructions; } + unsigned getNumExecuting() const { return NumExecuting; } + unsigned getNumExecuted() const { return NumExecuted; } + + const InstRef &getCriticalMemoryInstruction() const { + return CriticalMemoryInstruction; + } + const CriticalDependency &getCriticalPredecessor() const { + return CriticalPredecessor; + } + + void addSuccessor(MemoryGroup *Group, bool IsDataDependent) { + // Do not need to add a dependency if there is no data + // dependency and all instructions from this group have been + // issued already. + if (!IsDataDependent && isExecuting()) + return; + + Group->NumPredecessors++; + assert(!isExecuted() && "Should have been removed!"); + if (isExecuting()) + Group->onGroupIssued(CriticalMemoryInstruction, IsDataDependent); + + if (IsDataDependent) + DataSucc.emplace_back(Group); + else + OrderSucc.emplace_back(Group); + } + + bool isWaiting() const { + return NumPredecessors > + (NumExecutingPredecessors + NumExecutedPredecessors); + } + bool isPending() const { + return NumExecutingPredecessors && + ((NumExecutedPredecessors + NumExecutingPredecessors) == + NumPredecessors); + } + bool isReady() const { return NumExecutedPredecessors == NumPredecessors; } + bool isExecuting() const { + return NumExecuting && (NumExecuting == (NumInstructions - NumExecuted)); + } + bool isExecuted() const { return NumInstructions == NumExecuted; } + + void onGroupIssued(const InstRef &IR, bool ShouldUpdateCriticalDep) { + assert(!isReady() && "Unexpected group-start event!"); + NumExecutingPredecessors++; + + if (!ShouldUpdateCriticalDep) + return; + + unsigned Cycles = IR.getInstruction()->getCyclesLeft(); + if (CriticalPredecessor.Cycles < Cycles) { + CriticalPredecessor.IID = IR.getSourceIndex(); + CriticalPredecessor.Cycles = Cycles; + } + } + + void onGroupExecuted() { + assert(!isReady() && "Inconsistent state found!"); + NumExecutingPredecessors--; + NumExecutedPredecessors++; + } + + void onInstructionIssued(const InstRef &IR) { + assert(!isExecuting() && "Invalid internal state!"); + ++NumExecuting; + + // update the CriticalMemDep. + const Instruction &IS = *IR.getInstruction(); + if ((bool)CriticalMemoryInstruction) { + const Instruction &OtherIS = + *CriticalMemoryInstruction.getInstruction(); + if (OtherIS.getCyclesLeft() < IS.getCyclesLeft()) + CriticalMemoryInstruction = IR; + } else { + CriticalMemoryInstruction = IR; + } + + if (!isExecuting()) + return; + + // Notify successors that this group started execution. + for (MemoryGroup *MG : OrderSucc) { + MG->onGroupIssued(CriticalMemoryInstruction, false); + // Release the order dependency with this group. + MG->onGroupExecuted(); + } + + for (MemoryGroup *MG : DataSucc) + MG->onGroupIssued(CriticalMemoryInstruction, true); + } + + void onInstructionExecuted(const InstRef &IR) { + assert(isReady() && !isExecuted() && "Invalid internal state!"); + --NumExecuting; + ++NumExecuted; + + if (CriticalMemoryInstruction && + CriticalMemoryInstruction.getSourceIndex() == IR.getSourceIndex()) { + CriticalMemoryInstruction.invalidate(); + } + + if (!isExecuted()) + return; + + // Notify data dependent successors that this group has finished + // execution. + for (MemoryGroup *MG : DataSucc) + MG->onGroupExecuted(); + } + + void addInstruction() { + assert(!getNumSuccessors() && "Cannot add instructions to this group!"); + ++NumInstructions; + } + + void cycleEvent() { + if (isWaiting() && CriticalPredecessor.Cycles) + CriticalPredecessor.Cycles--; + } + }; + /// Used to map group identifiers to MemoryGroups. + DenseMap> Groups; + unsigned NextGroupID = 1; + unsigned CurrentLoadGroupID; unsigned CurrentLoadBarrierGroupID; unsigned CurrentStoreGroupID; @@ -453,6 +420,35 @@ class LSUnit : public LSUnitBase { /// accomodate instruction IR. Status isAvailable(const InstRef &IR) const override; + bool isReady(const InstRef &IR) const override { + unsigned GroupID = IR.getInstruction()->getLSUTokenID(); + const MemoryGroup &Group = getGroup(GroupID); + return Group.isReady(); + } + + bool isPending(const InstRef &IR) const override { + unsigned GroupID = IR.getInstruction()->getLSUTokenID(); + const MemoryGroup &Group = getGroup(GroupID); + return Group.isPending(); + } + + bool isWaiting(const InstRef &IR) const override { + unsigned GroupID = IR.getInstruction()->getLSUTokenID(); + const MemoryGroup &Group = getGroup(GroupID); + return Group.isWaiting(); + } + + bool hasDependentUsers(const InstRef &IR) const override { + unsigned GroupID = IR.getInstruction()->getLSUTokenID(); + const MemoryGroup &Group = getGroup(GroupID); + return !Group.isExecuted() && Group.getNumSuccessors(); + } + + const CriticalDependency getCriticalPredecessor(unsigned GroupId) override { + const MemoryGroup &Group = getGroup(GroupId); + return Group.getCriticalPredecessor(); + } + /// Allocates LS resources for instruction IR. /// /// This method assumes that a previous call to `isAvailable(IR)` succeeded @@ -468,7 +464,40 @@ class LSUnit : public LSUnitBase { /// 6. A store has to wait until an older store barrier is fully executed. unsigned dispatch(const InstRef &IR) override; - void onInstructionExecuted(const InstRef &IR) override; + virtual void onInstructionIssued(const InstRef &IR) override { + unsigned GroupID = IR.getInstruction()->getLSUTokenID(); + Groups[GroupID]->onInstructionIssued(IR); + } + + virtual void onInstructionRetired(const InstRef &IR) override; + + virtual void onInstructionExecuted(const InstRef &IR) override; + + virtual void cycleEvent() override; + +#ifndef NDEBUG + virtual void dump() const override; +#endif + +private: + bool isValidGroupID(unsigned Index) const { + return Index && Groups.contains(Index); + } + + const MemoryGroup &getGroup(unsigned Index) const { + assert(isValidGroupID(Index) && "Group doesn't exist!"); + return *Groups.find(Index)->second; + } + + MemoryGroup &getGroup(unsigned Index) { + assert(isValidGroupID(Index) && "Group doesn't exist!"); + return *Groups.find(Index)->second; + } + + unsigned createMemoryGroup() { + Groups.insert(std::make_pair(NextGroupID, std::make_unique())); + return NextGroupID++; + } }; } // namespace mca diff --git a/llvm/lib/MCA/HardwareUnits/LSUnit.cpp b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp index bdc8b3d0e390e..bf0b432524881 100644 --- a/llvm/lib/MCA/HardwareUnits/LSUnit.cpp +++ b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp @@ -24,7 +24,7 @@ namespace mca { LSUnitBase::LSUnitBase(const MCSchedModel &SM, unsigned LQ, unsigned SQ, bool AssumeNoAlias) : LQSize(LQ), SQSize(SQ), UsedLQEntries(0), UsedSQEntries(0), - NoAlias(AssumeNoAlias), NextGroupID(1) { + NoAlias(AssumeNoAlias) { if (SM.hasExtraProcessorInfo()) { const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); if (!LQSize && EPI.LoadQueueID) { @@ -41,13 +41,13 @@ LSUnitBase::LSUnitBase(const MCSchedModel &SM, unsigned LQ, unsigned SQ, LSUnitBase::~LSUnitBase() = default; -void LSUnitBase::cycleEvent() { +void LSUnit::cycleEvent() { for (const std::pair> &G : Groups) G.second->cycleEvent(); } #ifndef NDEBUG -void LSUnitBase::dump() const { +void LSUnit::dump() const { dbgs() << "[LSUnit] LQ_Size = " << getLoadQueueSize() << '\n'; dbgs() << "[LSUnit] SQ_Size = " << getStoreQueueSize() << '\n'; dbgs() << "[LSUnit] NextLQSlotIdx = " << getUsedLQEntries() << '\n'; @@ -96,8 +96,8 @@ unsigned LSUnit::dispatch(const InstRef &IR) { if (CurrentStoreBarrierGroupID) { MemoryGroup &StoreGroup = getGroup(CurrentStoreBarrierGroupID); LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" - << CurrentStoreBarrierGroupID - << ") --> (" << NewGID << ")\n"); + << CurrentStoreBarrierGroupID << ") --> (" << NewGID + << ")\n"); StoreGroup.addSuccessor(&NewGroup, true); } @@ -110,7 +110,6 @@ unsigned LSUnit::dispatch(const InstRef &IR) { StoreGroup.addSuccessor(&NewGroup, !assumeNoAlias()); } - CurrentStoreGroupID = NewGID; if (IsStoreBarrier) CurrentStoreBarrierGroupID = NewGID; @@ -165,8 +164,7 @@ unsigned LSUnit::dispatch(const InstRef &IR) { if (IsLoadBarrier) { if (ImmediateLoadDominator) { MemoryGroup &LoadGroup = getGroup(ImmediateLoadDominator); - LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" - << ImmediateLoadDominator + LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << ImmediateLoadDominator << ") --> (" << NewGID << ")\n"); LoadGroup.addSuccessor(&NewGroup, true); } @@ -175,8 +173,8 @@ unsigned LSUnit::dispatch(const InstRef &IR) { if (CurrentLoadBarrierGroupID) { MemoryGroup &LoadGroup = getGroup(CurrentLoadBarrierGroupID); LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" - << CurrentLoadBarrierGroupID - << ") --> (" << NewGID << ")\n"); + << CurrentLoadBarrierGroupID << ") --> (" << NewGID + << ")\n"); LoadGroup.addSuccessor(&NewGroup, true); } } @@ -202,16 +200,7 @@ LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const { return LSUnit::LSU_AVAILABLE; } -void LSUnitBase::onInstructionExecuted(const InstRef &IR) { - unsigned GroupID = IR.getInstruction()->getLSUTokenID(); - auto It = Groups.find(GroupID); - assert(It != Groups.end() && "Instruction not dispatched to the LS unit"); - It->second->onInstructionExecuted(IR); - if (It->second->isExecuted()) - Groups.erase(It); -} - -void LSUnitBase::onInstructionRetired(const InstRef &IR) { +void LSUnit::onInstructionRetired(const InstRef &IR) { const Instruction &IS = *IR.getInstruction(); bool IsALoad = IS.getMayLoad(); bool IsAStore = IS.getMayStore(); @@ -235,8 +224,13 @@ void LSUnit::onInstructionExecuted(const InstRef &IR) { if (!IS.isMemOp()) return; - LSUnitBase::onInstructionExecuted(IR); unsigned GroupID = IS.getLSUTokenID(); + auto It = Groups.find(GroupID); + assert(It != Groups.end() && "Instruction not dispatched to the LS unit"); + It->second->onInstructionExecuted(IR); + if (It->second->isExecuted()) + Groups.erase(It); + if (!isValidGroupID(GroupID)) { if (GroupID == CurrentLoadGroupID) CurrentLoadGroupID = 0; diff --git a/llvm/lib/MCA/HardwareUnits/Scheduler.cpp b/llvm/lib/MCA/HardwareUnits/Scheduler.cpp index a9bbf69799198..1a3e823eb52f7 100644 --- a/llvm/lib/MCA/HardwareUnits/Scheduler.cpp +++ b/llvm/lib/MCA/HardwareUnits/Scheduler.cpp @@ -85,8 +85,9 @@ void Scheduler::issueInstructionImpl( if (IS->isMemOp()) { LSU.onInstructionIssued(IR); - const MemoryGroup &Group = LSU.getGroup(IS->getLSUTokenID()); - IS->setCriticalMemDep(Group.getCriticalPredecessor()); + const CriticalDependency &MemDep = + LSU.getCriticalPredecessor(IS->getLSUTokenID()); + IS->setCriticalMemDep(MemDep); } if (IS->isExecuting())