From 21c22d336df727dd8fee9603c1b1acd61862e4b9 Mon Sep 17 00:00:00 2001 From: Martien de Jong Date: Mon, 4 Aug 2025 17:36:56 +0200 Subject: [PATCH 1/4] [AIE][CodeGenFormat] Compute ConflictBits using all given formats Separate SlotBits from ConflictBits --- llvm/include/llvm/Target/CodeGenFormat.td | 4 + llvm/lib/Target/AIE/AIE2PSlotInclude.td | 1 + llvm/lib/Target/AIE/AIE2Slots.td | 1 + .../Target/AIE/MCTargetDesc/AIEMCFormats.h | 15 ++- .../SlotLogicTests/slot_conflicts.td | 123 ++++++++++++++++++ llvm/unittests/Target/AIE/BundleTest.cpp | 10 +- llvm/utils/TableGen/CodeGenFormat.cpp | 47 ++++++- llvm/utils/TableGen/CodeGenFormat.h | 37 +++++- 8 files changed, 222 insertions(+), 16 deletions(-) create mode 100644 llvm/test/TableGen/CodeGenFormat/SlotLogicTests/slot_conflicts.td diff --git a/llvm/include/llvm/Target/CodeGenFormat.td b/llvm/include/llvm/Target/CodeGenFormat.td index fe1a7297462e..38ae704298fa 100644 --- a/llvm/include/llvm/Target/CodeGenFormat.td +++ b/llvm/include/llvm/Target/CodeGenFormat.td @@ -45,6 +45,10 @@ class InstSlot : Operand { let FieldToFind = "nop"; + let Artificial = true; } } diff --git a/llvm/lib/Target/AIE/AIE2Slots.td b/llvm/lib/Target/AIE/AIE2Slots.td index ebfb4654d149..67b4ca533249 100644 --- a/llvm/lib/Target/AIE/AIE2Slots.td +++ b/llvm/lib/Target/AIE/AIE2Slots.td @@ -59,6 +59,7 @@ let Namespace = "AIE2" in // 64 bit one. def nop_slot : InstSlot<"Nop", 1> { let FieldToFind = "nop"; + let Artificial = true; } } diff --git a/llvm/lib/Target/AIE/MCTargetDesc/AIEMCFormats.h b/llvm/lib/Target/AIE/MCTargetDesc/AIEMCFormats.h index 6ee71c813b6b..c144351744a6 100644 --- a/llvm/lib/Target/AIE/MCTargetDesc/AIEMCFormats.h +++ b/llvm/lib/Target/AIE/MCTargetDesc/AIEMCFormats.h @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates +// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates // //===----------------------------------------------------------------------===// // Utility classes to interface the generated Formats from CodeGenFormat with @@ -93,18 +93,23 @@ class MCSlotInfo { const char *SlotName; /// Size of the slot (in bits) const unsigned Size; - /// Bitset representing the occupancy of the slot + /// Bitset representing the occupancy of the slots const SlotBits SlotOccupancy; - /// Opcode of the NOP inst. attached to the slot + /// The closure of SlotOccupancy with the computed exclusions, + /// e.g. XM implies X and M + const SlotBits ConflictBits; + /// Opcode of the NOP instruction attached to the slot const unsigned NopOpc; public: constexpr MCSlotInfo(const char *SlotName, unsigned Size, SlotBits Bits, - unsigned NopOpc) - : SlotName(SlotName), Size(Size), SlotOccupancy(Bits), NopOpc(NopOpc) {} + SlotBits ConflictBits, unsigned NopOpc) + : SlotName(SlotName), Size(Size), SlotOccupancy(Bits), + ConflictBits(ConflictBits), NopOpc(NopOpc) {} const char *getName() const { return SlotName; } SlotBits getSlotSet() const { return SlotOccupancy; } + SlotBits getConflictSet() const { return ConflictBits; } unsigned getNOPOpcode() const { return NopOpc; } unsigned getSize() const { return Size; } }; diff --git a/llvm/test/TableGen/CodeGenFormat/SlotLogicTests/slot_conflicts.td b/llvm/test/TableGen/CodeGenFormat/SlotLogicTests/slot_conflicts.td new file mode 100644 index 000000000000..f2131c8fafb4 --- /dev/null +++ b/llvm/test/TableGen/CodeGenFormat/SlotLogicTests/slot_conflicts.td @@ -0,0 +1,123 @@ +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright 2025 Advanced Micro Devices, Inc. or its affiliates +// + +// RUN: llvm-tblgen --gen-instr-format -I %p -I %p/../ -I %p/../../../../include %s 2>&1 | FileCheck %s + +// This is a minimal check for the computations of the conflict bits. +// We have the slots A. X, M and XM, which interfere like the +// corresponding slots on typical AIE architectures. +// We compute conflicts from the totality of formats, and we should find +// conflicts +// X -> {X, XM}, +// M -> {M, XM}, +// XM -> {X, M, XM} +// A -> {A} +// and nothing more. + +// CHECK-LABEL: "X", +// CHECK-NEXT: 8, +// CHECK-NEXT: 1, +// CHECK-NEXT: 5, +// CHECK-LABEL: "A", +// CHECK-NEXT: 10, +// CHECK-NEXT: 2, +// CHECK-NEXT: 2, +// CHECK-LABEL: "XM", +// CHECK-NEXT: 18, +// CHECK-NEXT: 4, +// CHECK-NEXT: 13, +// CHECK-LABEL: "M", +// CHECK-NEXT: 10, +// CHECK-NEXT: 8, +// CHECK-NEXT: 12, +// CHECK-LABEL: "nop", +// CHECK-NEXT: 14, +// CHECK-NEXT: 16, +// CHECK-NEXT: 31, + +include "llvm/Target/Target.td" +include "llvm/Target/CodeGenFormat.td" + +let Namespace = "TEST" in { +def nop_slot : InstSlot<"nop", 14> { let Artificial = true; } +def lda_slot : InstSlot<"A", 10>; +def alu_slot : InstSlot<"X", 8>; +def mov_slot : InstSlot<"M", 10>; +def lng_slot : InstSlot<"XM", 18>; +} + + +def TESTInstrInfo : InstrInfo; + +def TEST : Target { + let InstructionSet = TESTInstrInfo; +} + +def TESTCodeGenFormat : CodeGenFormat { + let FormatClassEmitted = ""; +} + +let Namespace = "TEST" in +def default_slot : InstSlot<"default", 0, true>; + +class TESTInst pattern> + : Instruction, InstFormat +{ + let Namespace = "TEST"; + + dag OutOperandList = outs; + dag InOperandList = ins; + let AsmString = opc # "\t" # arg; + let Pattern = pattern; + + let Slot = default_slot; +} + +class TESTSubInst + : TESTInst +{ + let isComposite = false; +} + +class TESTFormat : TESTInst { + let isComposite = true; +} + + +class TEST_I32 : TESTFormat { + field bits<32> Inst; + field bits<32> instr32; + let Inst = {instr32}; +} + +def I32_A_XM : TEST_I32<(outs), (ins lda_slot:$lda, lng_slot:$lng)> { + bits<10> lda; + bits<18> lng; + let instr32 = {lda, lng, 0b1001}; +} + +def I32_A_X : TEST_I32<(outs), (ins lda_slot:$lda, alu_slot:$alu)> { + bits<10> lda; + bits<8> alu; + let instr32 = {lda, alu, 0b0000000000, 0b1001}; +} + +def I32_X_M : TEST_I32<(outs), (ins alu_slot:$alu, mov_slot:$mov)> { + bits<8> alu; + bits<10> mov; + let instr32 = {0b0000000000, alu, mov, 0b0110}; +} + +def I32_A_X_M : TEST_I32<(outs), + (ins lda_slot:$lda, alu_slot:$alu, mov_slot:$mov)> { + bits<10> lda; + bits<8> alu; + bits<10> mov; + let instr32 = {lda, alu, mov, 0b1110}; +} + diff --git a/llvm/unittests/Target/AIE/BundleTest.cpp b/llvm/unittests/Target/AIE/BundleTest.cpp index 575fa28bfad3..924ee1d2b561 100644 --- a/llvm/unittests/Target/AIE/BundleTest.cpp +++ b/llvm/unittests/Target/AIE/BundleTest.cpp @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates +// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates // //===----------------------------------------------------------------------===// @@ -42,10 +42,10 @@ const VLIWFormat FormatData[] = { {0, nullptr, {}, 0, 0}}; const PacketFormats MyFormats{FormatData}; -constexpr MCSlotInfo SlotInfos[4] = {{"ALU", 1, 0b0001, 0}, - {"MV", 1, 0b0010, 0}, - {"ST", 1, 0b0100, 0}, - {"LNG", 1, 0b1000, 0}}; +constexpr MCSlotInfo SlotInfos[4] = {{"ALU", 1, 0b0001, 0b0001, 0}, + {"MV", 1, 0b0010, 0b0010, 0}, + {"ST", 1, 0b0100, 0b0100, 0}, + {"LNG", 1, 0b1000, 0b1000, 0}}; // Stay clear of standard opcodes static const int FirstOpcode = 1000; diff --git a/llvm/utils/TableGen/CodeGenFormat.cpp b/llvm/utils/TableGen/CodeGenFormat.cpp index f87c236700e9..14f46b62ca21 100644 --- a/llvm/utils/TableGen/CodeGenFormat.cpp +++ b/llvm/utils/TableGen/CodeGenFormat.cpp @@ -109,6 +109,8 @@ void CodeGenFormat::run(raw_ostream &o) { assert(Slots.size() != 0 && "no Slot detected"); + computeSlotSets(Slots, InstFormats); + o << "#ifdef GET_FORMATS_SLOTKINDS\n" "#undef GET_FORMATS_SLOTKINDS\n\n"; Slots.emitTargetSlotKindEnum(o); @@ -207,7 +209,6 @@ void CodeGenFormat::run(raw_ostream &o) { for (TGInstrLayout &Inst : InstFormats) if (Inst.isPacketFormat()) { - Inst.computeSlotSet(); Packets.push_back(&Inst); } @@ -244,6 +245,47 @@ void CodeGenFormat::run(raw_ostream &o) { } } +void CodeGenFormat::computeSlotSets(TGTargetSlots &Slots, + std::vector &InstFormats) { + + // The slots accommodated by each format. + for (TGInstrLayout &Format : InstFormats) { + if (Format.isPacketFormat()) { + Format.computeSlotSet(); + } + } + + // The universe of all slots + uint64_t AllSlots = 0; + for (const auto &[_, Slot] : Slots) { + if (Slot.isDefaultSlot() || Slot.isArtificial()) { + continue; + } + AllSlots |= Slot.getSlotBits(); + } + + // Compute the conflict bits of all slots. + for (unsigned S = 0; S < Slots.size(); S++) { + // Effectively we check whether a slot cannot be combined with another + // slot in any format in which the first is accommodated. We start out + // with all slots excluded, and eliminate the exclusion when a format is + // encountered that allows both ThisSlot and the exclusion. + auto &Slot = Slots.getSlot(S).second; + const uint64_t ThisSlot = Slot.getSlotBits(); + uint64_t Excluded = AllSlots; + for (TGInstrLayout &Format : InstFormats) { + if (!Format.isPacketFormat()) { + continue; + } + uint64_t SlotSet = Format.getSlotSet(); + if (SlotSet & ThisSlot) { + Excluded &= ~SlotSet & AllSlots; + } + } + Slot.setConflictBits(ThisSlot | Excluded); + } +} + // Retrieve the number of consecutive bits (from BitPos) that are part of the // same "variable" (described by "VarName") // NOTE0: if the bit at BitPos isn't a variable bit, then we simply return 0 @@ -1093,7 +1135,8 @@ void TGTargetSlots::emitSlotsInfoInstantiation( << " " << TS.getSlotSize() << ",\n" // Right now, we're using the slot num as SlotSet - << " " << (uint64_t(1) << TS.getNumSlot()) << ",\n" + << " " << TS.getSlotBits() << ",\n" + << " " << TS.getConflictBits() << ",\n" << " " << NOPName << "\n" << "},\n"; } diff --git a/llvm/utils/TableGen/CodeGenFormat.h b/llvm/utils/TableGen/CodeGenFormat.h index 2a4d46793fcb..603c82f82eee 100644 --- a/llvm/utils/TableGen/CodeGenFormat.h +++ b/llvm/utils/TableGen/CodeGenFormat.h @@ -129,9 +129,11 @@ class CodeGenFormat { void run(raw_ostream &o); static unsigned getVariableBits(const std::string &VarName, - const BitsInit *BI, unsigned posBit); + const BitsInit *BI, unsigned BitPos); static unsigned getFixedBits(std::string &OutChunck, const BitsInit *BI, - unsigned posBit); + unsigned BitPos); + static void computeSlotSets(TGTargetSlots &Slots, + std::vector &InstFormats); }; /// Main class abstracting a CodeGenInstruction (CGI). @@ -269,13 +271,28 @@ class TGTargetSlot { std::string SlotName; // The namespace of the current instruction std::string Namespace; - // primarely used when emitting the C++ enumeration constant + // primarily used when emitting the C++ enumeration constant std::string EnumerationString; // Name of the field to find in the hierarchy std::string FieldToFind; + // Artificial slots should not be considered for resource estimates + bool Artificial = false; // Unique number attributed (in the pool) for the slot. // It is used to generate a unique "SlotSet". int NumSlot; + + // The Slot bits for this slot. These are the pristine bits that correspond + // to the slots a format accommodates. + uint64_t SlotBits = 0; + + // The conflict bits for this slot. These are used to prevent format table + // searches as much as possible. It normally duplicates the SlotBit, since + // that is a trivial conflict. + // However, there may be more bits set. For example, XM conflicts with + // XM, X and M. + // Note that the scoreboard runs on these conflict bits. + uint64_t ConflictBits; + // Size of the Slot, in bits. // The storage of the size is made using an integer value as the size could // be negative. In this case, the slot pool (TGTargetSlots) will consider @@ -296,6 +313,7 @@ class TGTargetSlot { SlotName(SlotRecord->getValueAsString("SlotName")), Namespace(SlotRecord->getValueAsString("Namespace")), FieldToFind(SlotRecord->getValueAsString("FieldToFind")), + Artificial(SlotRecord->getValueAsBit("Artificial")), // NumSlot is defined as -1 for now. // When the pool of slots will be finalized, then the final ID will be // attributed. @@ -349,12 +367,21 @@ class TGTargetSlot { const std::string &getLabelToFind() const { return FieldToFind; } unsigned getSlotSize() const { return static_cast(SlotSize); } + uint64_t getSlotBits() const { return SlotBits; } + uint64_t getConflictBits() const { return ConflictBits; } const std::string &getEnumerationString() const { return EnumerationString; } bool isDefaultSlot() const { return IsDefaultSlot; } + bool isArtificial() const { return Artificial; } + void setConflictBits(uint64_t Bits) { ConflictBits = Bits; } private: - void setNumSlot(int SlotID) { NumSlot = SlotID; } + void setNumSlot(int SlotID) { + NumSlot = SlotID; + const uint64_t Base = 1; + SlotBits = Base << SlotID; + } + void setNopInst(const std::string &NOPName) { NOPInstrName = NOPName; } }; @@ -393,6 +420,8 @@ class TGTargetSlots { /// Add the Slot (if legal) in the Slot pool bool addSlot(const Record *const R); + RecordSlot &getSlot(unsigned N) { return Slots[N]; } + /// This method performs a finalization of the slot pool. This finalization /// is composed of 2 stages: the attribution of a unique ID for each slot and /// then the sorting of the internal container based on their ID, in order to From 4bac810ca165ad012abb9d15dd71ab0d93031c35 Mon Sep 17 00:00:00 2001 From: Martien de Jong Date: Tue, 5 Aug 2025 15:31:13 +0200 Subject: [PATCH 2/4] [AIE] Use ConflictBits in bundle conflict test --- llvm/lib/Target/AIE/AIEBundle.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AIE/AIEBundle.h b/llvm/lib/Target/AIE/AIEBundle.h index a4852e2710dc..c36da6673fd9 100644 --- a/llvm/lib/Target/AIE/AIEBundle.h +++ b/llvm/lib/Target/AIE/AIEBundle.h @@ -91,10 +91,15 @@ template class Bundle { // Verify there is a format that can accommodate the new slots MCSlotKind Slot = FormatInterface->getSlotKind(InstOpCode); assert(Slot != MCSlotKind()); - SlotBits NewSlots = FormatInterface->getSlotInfo(Slot)->getSlotSet(); - return (OccupiedSlots & NewSlots) == 0 && - FormatInterface->getPacketFormats().getFormat(OccupiedSlots | - NewSlots); + + auto *SlotInfo = FormatInterface->getSlotInfo(Slot); + // ConflictBits is a fast predictor of missing formats + SlotBits ConflictBits = SlotInfo->getConflictSet(); + if (OccupiedSlots & ConflictBits) { + return false; + } + SlotBits NewSlots = OccupiedSlots | SlotInfo->getSlotSet(); + return FormatInterface->getPacketFormats().getFormat(NewSlots); } /// Add an instruction to the bundle From 3a9001d5403bc306f32ee98a21cdfc5d36e205a9 Mon Sep 17 00:00:00 2001 From: Martien de Jong Date: Tue, 5 Aug 2025 17:51:16 +0200 Subject: [PATCH 3/4] [AIE] Use ConflictBits in HazardRecognizer conflict interfaces --- llvm/lib/Target/AIE/AIEHazardRecognizer.cpp | 44 +++++++--- llvm/lib/Target/AIE/AIEHazardRecognizer.h | 17 ++-- .../AIE/aie2/schedule/conflict_x_m_xm.mir | 80 +++++++++++++++++++ .../Target/AIE/HazardRecognizerTest.cpp | 2 +- 4 files changed, 124 insertions(+), 19 deletions(-) create mode 100644 llvm/test/CodeGen/AIE/aie2/schedule/conflict_x_m_xm.mir diff --git a/llvm/lib/Target/AIE/AIEHazardRecognizer.cpp b/llvm/lib/Target/AIE/AIEHazardRecognizer.cpp index 885c40667465..636a36cf996f 100644 --- a/llvm/lib/Target/AIE/AIEHazardRecognizer.cpp +++ b/llvm/lib/Target/AIE/AIEHazardRecognizer.cpp @@ -61,7 +61,8 @@ void FuncUnitWrapper::setFormatInterface(const AIEBaseMCFormats *Formats) { bool FuncUnitWrapper::operator==(const FuncUnitWrapper &Other) const { return Required == Other.Required && Reserved == Other.Reserved && - Slots == Other.Slots && MemoryBanks == Other.MemoryBanks && + Slots == Other.Slots && Conflicts == Other.Conflicts && + MemoryBanks == Other.MemoryBanks && MemObjectsBits == Other.MemObjectsBits; } @@ -95,6 +96,7 @@ void FuncUnitWrapper::clearResources() { Required.clear(); Reserved.clear(); Slots = 0; + Conflicts = 0; MemoryBanks = 0; MemObjectsBits = 0; } @@ -117,6 +119,7 @@ FuncUnitWrapper &FuncUnitWrapper::operator|=(const FuncUnitWrapper &Other) { Required |= Other.Required; Reserved |= Other.Reserved; Slots |= Other.Slots; + Conflicts |= Other.Conflicts; MemoryBanks |= Other.MemoryBanks; MemObjectsBits |= Other.MemObjectsBits; return *this; @@ -125,6 +128,7 @@ FuncUnitWrapper &FuncUnitWrapper::operator|=(const FuncUnitWrapper &Other) { bool FuncUnitWrapper::conflict(const FuncUnitWrapper &Other) const { if ((Slots & Other.Slots) != 0 || (MemoryBanks & Other.MemoryBanks) != 0 || (MemObjectsBits & Other.MemObjectsBits) != 0 || + (Conflicts & Other.Slots) != 0 || (Slots & Other.Conflicts) != 0 || Required.overlap(Other.Required) || Reserved.overlap(Other.Required) || Required.overlap(Other.Reserved)) { @@ -482,6 +486,15 @@ static SlotBits getSlotSet(const MCInstrDesc &Desc, return IgnoreUnkownSlotSets ? 0 : ~0; } +static SlotBits getConflictSet(const MCInstrDesc &Desc, + const AIEBaseMCFormats &Formats) { + MCSlotKind SlotKind = Formats.getSlotKind(Desc.getOpcode()); + if (SlotKind != MCSlotKind()) + return Formats.getSlotInfo(SlotKind)->getConflictSet(); + + return 0; +} + namespace { auto toHazardType(bool Conflict) { return Conflict ? ScheduleHazardRecognizer::NoopHazard @@ -525,8 +538,9 @@ ScheduleHazardRecognizer::HazardType AIEHazardRecognizer::getHazardType( return toHazardType(checkConflict( TheScoreboard, ItinData, SchedClass, getSlotSet(Desc, *TII->getFormatInterface(), IgnoreUnknownSlotSets), - MemoryBanks, MemObjectsBits, TII->getMemoryCycles(SchedClass), - DeltaCycles, FUDepthLimit)); + getConflictSet(Desc, *TII->getFormatInterface()), MemoryBanks, + MemObjectsBits, TII->getMemoryCycles(SchedClass), DeltaCycles, + FUDepthLimit)); } bool AIEHazardRecognizer::checkConflict( @@ -540,25 +554,28 @@ bool AIEHazardRecognizer::checkConflict( return checkConflict( Scoreboard, ItinData, SchedClass, getSlotSet(Desc, *TII->getFormatInterface(), IgnoreUnknownSlotSets), - MemoryBanks, MemObjectsBits, TII->getMemoryCycles(SchedClass), - DeltaCycles, std::nullopt); + getConflictSet(Desc, *TII->getFormatInterface()), MemoryBanks, + MemObjectsBits, TII->getMemoryCycles(SchedClass), DeltaCycles, + std::nullopt); } bool AIEHazardRecognizer::checkConflict( const ResourceScoreboard &Scoreboard, const InstrItineraryData *ItinData, unsigned SchedClass, SlotBits SlotSet, - MemoryBankBits MemoryBanks, MemoryObjectsBits MemObjectsBits, - SmallVector MemoryAccessCycles, int DeltaCycles, - std::optional FUDepthLimit) { + SlotBits ConflictSet, MemoryBankBits MemoryBanks, + MemoryObjectsBits MemObjectsBits, SmallVector MemoryAccessCycles, + int DeltaCycles, std::optional FUDepthLimit) { // Verify format hazards - FuncUnitWrapper EmissionCycle(SlotSet); + FuncUnitWrapper EmissionCycle(SlotSet, ConflictSet); if (EmissionCycle.conflict(Scoreboard[DeltaCycles])) return true; // Verify memory bank and shared object hazards if (!MemoryAccessCycles.empty()) { - FuncUnitWrapper MemoryAccessCycle(/*SlotSet=*/0, MemoryBanks, + const SlotBits Slots = 0; + const SlotBits Conflicts = 0; + FuncUnitWrapper MemoryAccessCycle(Slots, Conflicts, MemoryBanks, MemObjectsBits); for (auto Cycles : MemoryAccessCycles) { @@ -644,13 +661,16 @@ void AIEHazardRecognizer::enterResources( std::optional FUDepthLimit) { // Append slot usage - FuncUnitWrapper EmissionCycle(SlotSet); + const SlotBits Conflicts = 0; + FuncUnitWrapper EmissionCycle(SlotSet, Conflicts); Scoreboard[DeltaCycles] |= EmissionCycle; // Append memory bank usage if (!MemoryAccessCycles.empty()) { + const SlotBits Slots = 0; + const SlotBits Conflicts = 0; FuncUnitWrapper MemoryBankAndObjectsAccessCycle( - /*SlotSet=*/0, MemoryBanks, MemObjectsBits); + Slots, Conflicts, MemoryBanks, MemObjectsBits); for (auto Cycles : MemoryAccessCycles) { assert(Scoreboard.isInRange(DeltaCycles + Cycles - 1)); Scoreboard[DeltaCycles + Cycles - 1] |= MemoryBankAndObjectsAccessCycle; diff --git a/llvm/lib/Target/AIE/AIEHazardRecognizer.h b/llvm/lib/Target/AIE/AIEHazardRecognizer.h index c924e857b963..d145a8423bb6 100644 --- a/llvm/lib/Target/AIE/AIEHazardRecognizer.h +++ b/llvm/lib/Target/AIE/AIEHazardRecognizer.h @@ -72,6 +72,9 @@ class FuncUnitWrapper { /// The occupied slots. This is currently redundant with Bundle SlotBits Slots = 0; + /// Conflicts are just for speeding up conflict detection. They may be present + /// in cycles to be merged, but they will not be merged into the scoreboard. + SlotBits Conflicts = 0; /// The occupied bank MemoryBankBits MemoryBanks = 0; @@ -105,10 +108,11 @@ class FuncUnitWrapper { /// Make this conflict with any non-empty cycle void blockResources(); FuncUnitWrapper() = default; - FuncUnitWrapper(SlotBits Slots, MemoryBankBits MemoryBanks = 0, + FuncUnitWrapper(SlotBits Slots, SlotBits Conflicts, + MemoryBankBits MemoryBanks = 0, MemoryObjectsBits MemObjectsBits = 0) - : Slots(Slots), MemoryBanks(MemoryBanks), MemObjectsBits(MemObjectsBits) { - } + : Slots(Slots), Conflicts(Conflicts), MemoryBanks(MemoryBanks), + MemObjectsBits(MemObjectsBits) {} /// Compare two FuncUnitWrappers for equality. This is only used for /// dumping purposes, quite literally saying "this looks the same" @@ -268,9 +272,10 @@ class AIEHazardRecognizer : public ScheduleHazardRecognizer { static bool checkConflict(const ResourceScoreboard &Scoreboard, const InstrItineraryData *ItinData, unsigned SchedClass, - SlotBits SlotSet, MemoryBankBits MemoryBanks, - uint64_t MemObjectsBits, SmallVector MemoryAccessCycles, - int DeltaCycles, std::optional FUDepthLimit); + SlotBits SlotSet, SlotBits Conflicts, + MemoryBankBits MemoryBanks, uint64_t MemObjectsBits, + SmallVector MemoryAccessCycles, int DeltaCycles, + std::optional FUDepthLimit); static void enterResources(ResourceScoreboard &Scoreboard, const InstrItineraryData *ItinData, diff --git a/llvm/test/CodeGen/AIE/aie2/schedule/conflict_x_m_xm.mir b/llvm/test/CodeGen/AIE/aie2/schedule/conflict_x_m_xm.mir new file mode 100644 index 000000000000..fbc049cee6e9 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2/schedule/conflict_x_m_xm.mir @@ -0,0 +1,80 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates + +# RUN: llc -mtriple=aie2 -run-pass=postmisched %s -o - | FileCheck %s + + +# This is an explicit test for the optimized slot conflict detection +# On aie2, this centers around XM conflicting with X and M, but not with others. +# also, X should not conflict with M +# The code below generates two bundles, one lda + lng, one alu + mv + +--- +name: f +alignment: 16 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: f + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $r1 = MOVX_alu_cg 1 + ; CHECK-NEXT: $r2 = MOVXM_lng_cg 12345 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $r1 = MOV_mv_scl $r0 + ; CHECK-NEXT: $r2 = MOVXM_lng_cg 12345 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: BUNDLE implicit-def $r1, implicit-def $r2 { + ; CHECK-NEXT: $r1 = MOVA_lda_cg 2 + ; CHECK-NEXT: $r2 = MOVXM_lng_cg 12345 + ; CHECK-NEXT: } + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: RET implicit $lr + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: BUNDLE implicit-def $r1, implicit-def $r2, implicit killed $r0 { + ; CHECK-NEXT: $r1 = MOVX_alu_cg 1 + ; CHECK-NEXT: $r2 = MOV_mv_scl killed $r0 + ; CHECK-NEXT: } + ; CHECK-NEXT: DelayedSchedBarrier implicit killed $r1, implicit killed $r2 + bb.0: + liveins: $r0 + $r1 = MOVX_alu_cg 1 + $r2 = MOVXM_lng_cg 12345 + + bb.1: + liveins: $r0 + $r1 = MOV_mv_scl $r0 + $r2 = MOVXM_lng_cg 12345 + + bb.2: + liveins: $r0 + $r1 = MOVA_lda_cg 2 + $r2 = MOVXM_lng_cg 12345 + + bb.3: + liveins: $r0 + $r1 = MOVX_alu_cg 1 + $r2 = MOV_mv_scl $r0 + + RET implicit $lr + DelayedSchedBarrier implicit $r1, implicit $r2 +... diff --git a/llvm/unittests/Target/AIE/HazardRecognizerTest.cpp b/llvm/unittests/Target/AIE/HazardRecognizerTest.cpp index e5b587d19e5c..efa0dc52e780 100644 --- a/llvm/unittests/Target/AIE/HazardRecognizerTest.cpp +++ b/llvm/unittests/Target/AIE/HazardRecognizerTest.cpp @@ -135,7 +135,7 @@ class MockHR : public AIEHazardRecognizer { bool hazard(unsigned SchedClass, int Delta, SlotBits SlotSet = 0, MemoryBankBits MemoryBanks = 0, MemoryObjectsBits ObjectsBits = 0, SmallVector MemoryAccessCycles = {}) { - return checkConflict(MockScoreboard, &Itins, SchedClass, SlotSet, + return checkConflict(MockScoreboard, &Itins, SchedClass, SlotSet, SlotSet, MemoryBanks, ObjectsBits, MemoryAccessCycles, Delta, std::nullopt); } From b5a52e5febc1538511e6a3cb1fd97ecea090f6e3 Mon Sep 17 00:00:00 2001 From: Martien de Jong Date: Fri, 15 Aug 2025 09:27:36 +0200 Subject: [PATCH 4/4] [AIE] Improved format availability check --- llvm/lib/Target/AIE/AIEBundle.h | 6 ++- llvm/lib/Target/AIE/AIEHazardRecognizer.cpp | 2 +- .../Target/AIE/MCTargetDesc/AIE2MCFormats.cpp | 4 ++ .../AIE/MCTargetDesc/AIEBaseMCFormats.cpp | 5 +++ .../Target/AIE/MCTargetDesc/AIEMCFormats.cpp | 4 ++ .../Target/AIE/MCTargetDesc/AIEMCFormats.h | 7 ++++ .../AIE/MCTargetDesc/aie2p/AIE2PMCFormats.cpp | 4 ++ llvm/unittests/Target/AIE/BundleTest.cpp | 3 ++ llvm/utils/TableGen/CodeGenFormat.cpp | 37 +++++++++++++++++++ 9 files changed, 69 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AIE/AIEBundle.h b/llvm/lib/Target/AIE/AIEBundle.h index c36da6673fd9..7e949c80ed76 100644 --- a/llvm/lib/Target/AIE/AIEBundle.h +++ b/llvm/lib/Target/AIE/AIEBundle.h @@ -98,8 +98,10 @@ template class Bundle { if (OccupiedSlots & ConflictBits) { return false; } - SlotBits NewSlots = OccupiedSlots | SlotInfo->getSlotSet(); - return FormatInterface->getPacketFormats().getFormat(NewSlots); + const SlotBits NewSlots = OccupiedSlots | SlotInfo->getSlotSet(); + // Note: Now that we have the conflict bits we may no longer need this + // final check, but it is cheap and represents proven technology. + return FormatInterface->isFormatAvailable(NewSlots); } /// Add an instruction to the bundle diff --git a/llvm/lib/Target/AIE/AIEHazardRecognizer.cpp b/llvm/lib/Target/AIE/AIEHazardRecognizer.cpp index 636a36cf996f..a16ae6154de1 100644 --- a/llvm/lib/Target/AIE/AIEHazardRecognizer.cpp +++ b/llvm/lib/Target/AIE/AIEHazardRecognizer.cpp @@ -139,7 +139,7 @@ bool FuncUnitWrapper::conflict(const FuncUnitWrapper &Other) const { // This allows representing a blocked cycle (Slots = ~0) without knowing // the slot and format details. return Slots && Other.Slots && - !FormatInterface->getPacketFormats().getFormat(Slots | Other.Slots); + !FormatInterface->isFormatAvailable(Slots | Other.Slots); } namespace { diff --git a/llvm/lib/Target/AIE/MCTargetDesc/AIE2MCFormats.cpp b/llvm/lib/Target/AIE/MCTargetDesc/AIE2MCFormats.cpp index 5f61994282d4..be62b5711c49 100644 --- a/llvm/lib/Target/AIE/MCTargetDesc/AIE2MCFormats.cpp +++ b/llvm/lib/Target/AIE/MCTargetDesc/AIE2MCFormats.cpp @@ -35,6 +35,10 @@ const MCFormatDesc *AIE2MCFormats::getMCFormats() const { const PacketFormats &AIE2MCFormats::getPacketFormats() const { return Formats; } +ArrayRef AIE2MCFormats::getIsFormatAvailable() const { + return FormatAvailable; +} + SmallVector AIE2MCFormats::getLoadSlotKinds() const { return {MCSlotKind::AIE2_SLOT_LDB, MCSlotKind::AIE2_SLOT_LDA}; } diff --git a/llvm/lib/Target/AIE/MCTargetDesc/AIEBaseMCFormats.cpp b/llvm/lib/Target/AIE/MCTargetDesc/AIEBaseMCFormats.cpp index d18792d7c116..c986aab70d75 100644 --- a/llvm/lib/Target/AIE/MCTargetDesc/AIEBaseMCFormats.cpp +++ b/llvm/lib/Target/AIE/MCTargetDesc/AIEBaseMCFormats.cpp @@ -58,6 +58,11 @@ bool AIEBaseMCFormats::isSupportedInstruction(unsigned int Opcode) const { return getFormatDescIndex(Opcode) ? true : false; } +bool AIEBaseMCFormats::isFormatAvailable(uint64_t SlotSet) const { + ArrayRef IsFormatsAvailable = getIsFormatAvailable(); + return SlotSet < IsFormatsAvailable.size() && IsFormatsAvailable[SlotSet]; +} + const MCSlotKind AIEBaseMCFormats::getSlotKind(unsigned int Opcode) const { // First, we check that the instruction has a format defined. // Some KILLs instructions are still in the pipeline for example... diff --git a/llvm/lib/Target/AIE/MCTargetDesc/AIEMCFormats.cpp b/llvm/lib/Target/AIE/MCTargetDesc/AIEMCFormats.cpp index 84aaa2a079d2..ae6863890d1f 100644 --- a/llvm/lib/Target/AIE/MCTargetDesc/AIEMCFormats.cpp +++ b/llvm/lib/Target/AIE/MCTargetDesc/AIEMCFormats.cpp @@ -62,4 +62,8 @@ const MCFormatDesc *AIEMCFormats::getMCFormats() const { return AIE::Formats; } const PacketFormats &AIEMCFormats::getPacketFormats() const { return Formats; } +ArrayRef AIEMCFormats::getIsFormatAvailable() const { + return FormatAvailable; +} + } // end namespace llvm diff --git a/llvm/lib/Target/AIE/MCTargetDesc/AIEMCFormats.h b/llvm/lib/Target/AIE/MCTargetDesc/AIEMCFormats.h index c144351744a6..32b4c1e59cbc 100644 --- a/llvm/lib/Target/AIE/MCTargetDesc/AIEMCFormats.h +++ b/llvm/lib/Target/AIE/MCTargetDesc/AIEMCFormats.h @@ -389,11 +389,15 @@ class AIEBaseMCFormats { virtual const PacketFormats &getPacketFormats() const = 0; + virtual ArrayRef getIsFormatAvailable() const = 0; + // \return all Slots that correspond to the load instructions virtual SmallVector getLoadSlotKinds() const { llvm_unreachable("Target didn't implement getLoadSlotKinds()"); } + bool isFormatAvailable(uint64_t SlotSet) const; + protected: /// Check if the Instruction is indeed into the Tables. void checkInstructionIsSupported(unsigned int Opcode) const; @@ -407,6 +411,7 @@ class AIEMCFormats : public AIEBaseMCFormats { getFormatDescIndex(unsigned int Opcode) const override; const MCSlotInfo *getSlotInfo(const MCSlotKind Kind) const override; const MCFormatDesc *getMCFormats() const override; + ArrayRef getIsFormatAvailable() const override; const PacketFormats &getPacketFormats() const override; }; @@ -419,6 +424,7 @@ class AIE2MCFormats : public AIEBaseMCFormats { const MCSlotInfo *getSlotInfo(const MCSlotKind Kind) const override; const MCFormatDesc *getMCFormats() const override; const PacketFormats &getPacketFormats() const override; + ArrayRef getIsFormatAvailable() const override; SmallVector getLoadSlotKinds() const override; }; @@ -431,6 +437,7 @@ class AIE2PMCFormats : public AIEBaseMCFormats { const MCSlotInfo *getSlotInfo(const MCSlotKind Kind) const override; const MCFormatDesc *getMCFormats() const override; const PacketFormats &getPacketFormats() const override; + ArrayRef getIsFormatAvailable() const override; SmallVector getLoadSlotKinds() const override; }; diff --git a/llvm/lib/Target/AIE/MCTargetDesc/aie2p/AIE2PMCFormats.cpp b/llvm/lib/Target/AIE/MCTargetDesc/aie2p/AIE2PMCFormats.cpp index a9cbc333dc29..a272a51d1b94 100644 --- a/llvm/lib/Target/AIE/MCTargetDesc/aie2p/AIE2PMCFormats.cpp +++ b/llvm/lib/Target/AIE/MCTargetDesc/aie2p/AIE2PMCFormats.cpp @@ -37,6 +37,10 @@ const PacketFormats &AIE2PMCFormats::getPacketFormats() const { return Formats; } +ArrayRef AIE2PMCFormats::getIsFormatAvailable() const { + return FormatAvailable; +} + SmallVector AIE2PMCFormats::getLoadSlotKinds() const { return {MCSlotKind::AIE2P_SLOT_LDB, MCSlotKind::AIE2P_SLOT_LDA}; } diff --git a/llvm/unittests/Target/AIE/BundleTest.cpp b/llvm/unittests/Target/AIE/BundleTest.cpp index 924ee1d2b561..aa3782dff246 100644 --- a/llvm/unittests/Target/AIE/BundleTest.cpp +++ b/llvm/unittests/Target/AIE/BundleTest.cpp @@ -99,6 +99,9 @@ class MockMCFormats : public AIEBaseMCFormats { const MCFormatDesc *getMCFormats() const override { llvm_unreachable("Un-implemented"); } + ArrayRef getIsFormatAvailable() const override { + llvm_unreachable("Un-implemented"); + } }; class MockTII : public AIEInstrInfo { diff --git a/llvm/utils/TableGen/CodeGenFormat.cpp b/llvm/utils/TableGen/CodeGenFormat.cpp index 14f46b62ca21..08473d05ef57 100644 --- a/llvm/utils/TableGen/CodeGenFormat.cpp +++ b/llvm/utils/TableGen/CodeGenFormat.cpp @@ -240,6 +240,43 @@ void CodeGenFormat::run(raw_ostream &o) { o << SlotData << FormatData << "static const PacketFormats Formats {FormatData};\n\n"; + // Create an O(1) function of available formats by ticking them in + // a lookup table; + uint64_t MaxSlotSet = 0; + for (const TGInstrLayout *Packet : Packets) { + MaxSlotSet = std::max(MaxSlotSet, Packet->getSlotSet()); + } + const size_t Size = MaxSlotSet + 1; + // Catering for 16 slots for now, just to check the logic. + assert(Size <= 65536); + std::vector LUT(Size, false); + // First the trivial ones, without nops. + for (const TGInstrLayout *Packet : Packets) { + LUT[Packet->getSlotSet()] = true; + } + // Then the remaining ones + for (uint64_t SlotSet = 0; SlotSet < Size; SlotSet++) { + if (LUT[SlotSet]) { + continue; + } + // We start with the largest packets, since they cover more. + for (const TGInstrLayout *Packet : reverse(Packets)) { + // If the packet clears all bits of SlotSet, it can hold all these + // slots. Slots that are not present in SlotSet can be filled + // with nops. + if ((SlotSet & ~Packet->getSlotSet()) == 0) { + LUT[SlotSet] = true; + break; + } + } + } + o << "const size_t SlotSetSize = " << Size << ";\n"; + o << "static const bool FormatAvailable[SlotSetSize] = {\n"; + for (size_t Index = 0; Index < Size; Index++) { + o << "\t/* " << Index << " */ " << LUT[Index] << ",\n"; + } + o << "};\n"; + o << "#endif // GET_FORMATS_PACKETS_TABLE\n\n"; } }