diff --git a/llvm/docs/CommandGuide/llvm-mca.rst b/llvm/docs/CommandGuide/llvm-mca.rst index 1daae5d064aca..8598a5ca84ed8 100644 --- a/llvm/docs/CommandGuide/llvm-mca.rst +++ b/llvm/docs/CommandGuide/llvm-mca.rst @@ -383,6 +383,20 @@ that do not start with `LLVM-MCA-` are ignored by :program:`llvm-mca`. An instruction (a MCInst) is added to an InstrumentRegion R only if its location is in range [R.RangeStart, R.RangeEnd]. +There is one instrument that can be used on all targets to explicitly +set instruction latencies. It can be used, for example, to model the +cache misses that impact load latencies. The syntax is like + +.. code-block:: none + + # LLVM-MCA-LATENCY 100 + mov (%edi), %eax + # LLVM-MCA-LATENCY + +It sets the latency of mov instruction to 100. LLVM-MCA-LATENCY without +argument ends the region with explicit latency, after it default target +latencies are used. + On RISCV targets, vector instructions have different behaviour depending on the LMUL. Code can be instrumented with a comment that takes the following form: diff --git a/llvm/include/llvm/MCA/CustomBehaviour.h b/llvm/include/llvm/MCA/CustomBehaviour.h index 0bf86ce688f44..0ce3993be95ba 100644 --- a/llvm/include/llvm/MCA/CustomBehaviour.h +++ b/llvm/include/llvm/MCA/CustomBehaviour.h @@ -134,6 +134,25 @@ class Instrument { StringRef getData() const { return Data; } }; +class LatencyInstrument : public Instrument { + std::optional Latency; + +public: + static const StringRef DESC_NAME; + LatencyInstrument(StringRef Data) : Instrument(DESC_NAME, Data) { + // Skip spaces and tabs. + Data = Data.trim(); + if (Data.empty()) // Empty description. Bail out. + return; + unsigned L = 0; + if (!Data.getAsInteger(10, L)) + Latency = L; + } + + bool hasValue() const { return bool(Latency); } + unsigned getLatency() const { return *Latency; } +}; + using UniqueInstrument = std::unique_ptr; /// This class allows targets to optionally customize the logic that resolves @@ -143,19 +162,21 @@ class LLVM_ABI InstrumentManager { protected: const MCSubtargetInfo &STI; const MCInstrInfo &MCII; + bool EnableInstruments; public: - InstrumentManager(const MCSubtargetInfo &STI, const MCInstrInfo &MCII) - : STI(STI), MCII(MCII) {} + InstrumentManager(const MCSubtargetInfo &STI, const MCInstrInfo &MCII, + bool EnableInstruments = true) + : STI(STI), MCII(MCII), EnableInstruments(EnableInstruments) {}; virtual ~InstrumentManager() = default; /// Returns true if llvm-mca should ignore instruments. - virtual bool shouldIgnoreInstruments() const { return true; } + virtual bool shouldIgnoreInstruments() const { return !EnableInstruments; } // Returns true if this supports processing Instrument with // Instrument.Desc equal to Type - virtual bool supportsInstrumentType(StringRef Type) const { return false; } + virtual bool supportsInstrumentType(StringRef Type) const; /// Allocate an Instrument, and return a unique pointer to it. This function /// may be useful to create instruments coming from comments in the assembly. @@ -175,6 +196,13 @@ class LLVM_ABI InstrumentManager { /// it returns the SchedClassID that belongs to MCI. virtual unsigned getSchedClassID(const MCInstrInfo &MCII, const MCInst &MCI, const SmallVector &IVec) const; + + // Return true if instruments can modify instruction description + virtual bool canCustomize(const ArrayRef IVec) const; + + // Customize instruction description + virtual void customize(const ArrayRef IVec, + llvm::mca::InstrDesc &Desc) const; }; } // namespace mca diff --git a/llvm/include/llvm/MCA/InstrBuilder.h b/llvm/include/llvm/MCA/InstrBuilder.h index e0949d975fa99..a5ce632b03634 100644 --- a/llvm/include/llvm/MCA/InstrBuilder.h +++ b/llvm/include/llvm/MCA/InstrBuilder.h @@ -78,6 +78,10 @@ class InstrBuilder { DenseMap, std::unique_ptr> VariantDescriptors; + // These descriptors are customized for particular instructions and cannot + // be reused + SmallVector> CustomDescriptors; + bool FirstCallInst; bool FirstReturnInst; unsigned CallLatency; diff --git a/llvm/lib/MCA/CustomBehaviour.cpp b/llvm/lib/MCA/CustomBehaviour.cpp index 1aa266e0a1e43..51b85bf819840 100644 --- a/llvm/lib/MCA/CustomBehaviour.cpp +++ b/llvm/lib/MCA/CustomBehaviour.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/MCA/CustomBehaviour.h" +#include "llvm/MCA/Instruction.h" namespace llvm { namespace mca { @@ -42,8 +43,44 @@ CustomBehaviour::getEndViews(llvm::MCInstPrinter &IP, return std::vector>(); } -UniqueInstrument InstrumentManager::createInstrument(llvm::StringRef Desc, - llvm::StringRef Data) { +const llvm::StringRef LatencyInstrument::DESC_NAME = "LATENCY"; + +bool InstrumentManager::supportsInstrumentType(StringRef Type) const { + return EnableInstruments && Type == LatencyInstrument::DESC_NAME; +} + +bool InstrumentManager::canCustomize(const ArrayRef IVec) const { + for (const auto I : IVec) { + if (I->getDesc() == LatencyInstrument::DESC_NAME) { + auto LatInst = static_cast(I); + return LatInst->hasValue(); + } + } + return false; +} + +void InstrumentManager::customize(const ArrayRef IVec, + InstrDesc &ID) const { + for (const auto I : IVec) { + if (I->getDesc() == LatencyInstrument::DESC_NAME) { + auto LatInst = static_cast(I); + if (LatInst->hasValue()) { + unsigned Latency = LatInst->getLatency(); + // TODO Allow to customize a subset of ID.Writes + for (auto &W : ID.Writes) + W.Latency = Latency; + ID.MaxLatency = Latency; + } + } + } +} + +UniqueInstrument InstrumentManager::createInstrument(StringRef Desc, + StringRef Data) { + if (EnableInstruments) { + if (Desc == LatencyInstrument::DESC_NAME) + return std::make_unique(Data); + } return std::make_unique(Desc, Data); } diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp index cad25a6ddd3f5..ffd6a8d5c14a8 100644 --- a/llvm/lib/MCA/InstrBuilder.cpp +++ b/llvm/lib/MCA/InstrBuilder.cpp @@ -632,6 +632,12 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI, return std::move(Err); // Now add the new descriptor. + + if (IM.canCustomize(IVec)) { + IM.customize(IVec, *ID); + return *CustomDescriptors.emplace_back(std::move(ID)); + } + bool IsVariadic = MCDesc.isVariadic(); if ((ID->IsRecyclable = !IsVariadic && !IsVariant)) { auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID); @@ -676,7 +682,9 @@ STATISTIC(NumVariantInst, "Number of MCInsts that doesn't have static Desc"); Expected> InstrBuilder::createInstruction(const MCInst &MCI, const SmallVector &IVec) { - Expected DescOrErr = getOrCreateInstrDesc(MCI, IVec); + Expected DescOrErr = IM.canCustomize(IVec) + ? createInstrDescImpl(MCI, IVec) + : getOrCreateInstrDesc(MCI, IVec); if (!DescOrErr) return DescOrErr.takeError(); const InstrDesc &D = *DescOrErr; diff --git a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-13.s b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-13.s new file mode 100644 index 0000000000000..aa00ac4400820 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-13.s @@ -0,0 +1,12 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=10 %s 2>&1 | FileCheck %s + +# LLVM-MCA-LATENCY 100 +add (%eax), %eax +# LLVM-MCA-LATENCY +mov %eax, (%ebx) + +# CHECK: Iterations: 10 +# CHECK-NEXT: Instructions: 20 +# CHECK-NEXT: Total Cycles: 1004 +# CHECK-NEXT: Total uOps: 20 diff --git a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-14.s b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-14.s new file mode 100644 index 0000000000000..f460f1df8a703 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-14.s @@ -0,0 +1,11 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=10 %s 2>&1 | FileCheck %s + +# LLVM-MCA-LATENCY 100 +add (%eax), %eax +mov %eax, (%ebx) + +# CHECK: Iterations: 10 +# CHECK-NEXT: Instructions: 20 +# CHECK-NEXT: Total Cycles: 1103 +# CHECK-NEXT: Total uOps: 20 diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp index a04b6ee9d90cf..a4194da4a7b63 100644 --- a/llvm/tools/llvm-mca/llvm-mca.cpp +++ b/llvm/tools/llvm-mca/llvm-mca.cpp @@ -510,11 +510,16 @@ int main(int argc, char **argv) { if (!DisableInstrumentManager) { IM = std::unique_ptr( TheTarget->createInstrumentManager(*STI, *MCII)); - } - if (!IM) { - // If the target doesn't have its own IM implemented (or the -disable-cb - // flag is set) then we use the base class (which does nothing). - IM = std::make_unique(*STI, *MCII); + if (!IM) { + // If the target doesn't have its own IM implemented we use base class + // with instruments enabled. + IM = std::make_unique(*STI, *MCII); + } + } else { + // If the -disable-im flag is set then we use the default base class + // implementation and disable the instruments. + IM = std::make_unique(*STI, *MCII, + /*EnableInstruments=*/false); } // Parse the input and create InstrumentRegion that llvm-mca diff --git a/llvm/unittests/tools/llvm-mca/MCATestBase.cpp b/llvm/unittests/tools/llvm-mca/MCATestBase.cpp index 2b8415b13cf45..c47ac4828fc40 100644 --- a/llvm/unittests/tools/llvm-mca/MCATestBase.cpp +++ b/llvm/unittests/tools/llvm-mca/MCATestBase.cpp @@ -57,16 +57,24 @@ void MCATestBase::SetUp() { ASSERT_TRUE(IP); } -Error MCATestBase::runBaselineMCA(json::Object &Result, ArrayRef Insts, - ArrayRef Views, - const mca::PipelineOptions *PO) { +Error MCATestBase::runBaselineMCA( + json::Object &Result, ArrayRef Insts, ArrayRef Views, + const mca::PipelineOptions *PO, + ArrayRef> Descs) { mca::Context MCA(*MRI, *STI); - // Default InstrumentManager - auto IM = std::make_unique(*STI, *MCII); + // Enable instruments when descriptions are provided + auto IM = + std::make_unique(*STI, *MCII, !Descs.empty()); mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM, /*CallLatency=*/100); - const SmallVector Instruments; + SmallVector Instruments; + SmallVector InstrumentsOwner; + for (const auto &Desc : Descs) { + auto I = IM->createInstrument(Desc.first, Desc.second); + Instruments.push_back(I.get()); + InstrumentsOwner.push_back(std::move(I)); + } SmallVector> LoweredInsts; for (const auto &MCI : Insts) { Expected> Inst = diff --git a/llvm/unittests/tools/llvm-mca/MCATestBase.h b/llvm/unittests/tools/llvm-mca/MCATestBase.h index 66e20a45c96ce..6672305cdb878 100644 --- a/llvm/unittests/tools/llvm-mca/MCATestBase.h +++ b/llvm/unittests/tools/llvm-mca/MCATestBase.h @@ -24,6 +24,7 @@ #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/MCA/Context.h" +#include "llvm/MCA/InstrBuilder.h" #include "llvm/TargetParser/SubtargetFeature.h" #include "llvm/TargetParser/Triple.h" @@ -73,9 +74,11 @@ class MCATestBase : public ::testing::Test { /// Utility function to run MCA with (nearly) the same configuration as the /// `llvm-mca` tool to verify result correctness. /// This function only displays on SummaryView by default. - virtual Error runBaselineMCA(json::Object &Result, ArrayRef Insts, - ArrayRef Views = {}, - const mca::PipelineOptions *PO = nullptr); + virtual Error + runBaselineMCA(json::Object &Result, ArrayRef Insts, + ArrayRef Views = {}, + const mca::PipelineOptions *PO = nullptr, + ArrayRef> Descs = {}); }; } // end namespace mca diff --git a/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp b/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp index 1a14c687295ca..17809e7beda95 100644 --- a/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp +++ b/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp @@ -234,3 +234,27 @@ TEST_F(X86TestBase, TestVariantInstructionsSameAddress) { Expected Cycles = P->run(); ASSERT_TRUE(static_cast(Cycles)); } + +// Test customization of instruction latency with instruments +TEST_F(X86TestBase, TestInstructionCustomization) { + const unsigned ExplicitLatency = 100; + SmallVector MCIs; + MCInst InstructionToAdd = MCInstBuilder(X86::XOR64rr) + .addReg(X86::RAX) + .addReg(X86::RBX) + .addReg(X86::RCX); + MCIs.push_back(InstructionToAdd); + SmallVector> InstrDescs; + auto LatStr = std::to_string(ExplicitLatency); + InstrDescs.push_back(std::make_pair(StringRef("LATENCY"), StringRef(LatStr))); + + // Run the baseline. + json::Object BaselineResult; + auto E = runBaselineMCA(BaselineResult, MCIs, {}, nullptr, InstrDescs); + ASSERT_FALSE(bool(E)) << "Failed to run baseline"; + auto *BaselineObj = BaselineResult.getObject("SummaryView"); + auto V = BaselineObj->getInteger("TotalCycles"); + ASSERT_TRUE(V); + // Additional 3 cycles for Dispatch, Executed and Retired states + ASSERT_EQ(unsigned(*V), ExplicitLatency + 3) << "Total cycles do not match"; +}