-
Notifications
You must be signed in to change notification settings - Fork 30
[NFC][AIE] Streamline the format checks using Conflict bits. #704
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: aie-public
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -91,10 +91,17 @@ template <class I> class Bundle { | |
| // Verify there is a format that can accommodate the new slots | ||
| MCSlotKind Slot = FormatInterface->getSlotKind(InstOpCode); | ||
| assert(Slot != MCSlotKind()); | ||
| SlotBits NewSlots = FormatInterface->getSlotInfo(Slot)->getSlotSet(); | ||
| return (OccupiedSlots & NewSlots) == 0 && | ||
| FormatInterface->getPacketFormats().getFormat(OccupiedSlots | | ||
| NewSlots); | ||
|
|
||
| auto *SlotInfo = FormatInterface->getSlotInfo(Slot); | ||
| // ConflictBits is a fast predictor of missing formats | ||
| SlotBits ConflictBits = SlotInfo->getConflictSet(); | ||
| if (OccupiedSlots & ConflictBits) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wouldn't the OccupiedSlots be the ConflictBits of a Bundle?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think all your comments are covered by this one answer. We keep Slots since they represent the true base class from which the instructions derive. Each instruction has exactly one slot. These are also the slots that are listed in the ISA, in the formats, they define the operands of the format instructions, etc. |
||
| return false; | ||
| } | ||
| const SlotBits NewSlots = OccupiedSlots | SlotInfo->getSlotSet(); | ||
| // Note: Now that we have the conflict bits we may no longer need this | ||
| // final check, but it is cheap and represents proven technology. | ||
| return FormatInterface->isFormatAvailable(NewSlots); | ||
| } | ||
|
|
||
| /// Add an instruction to the bundle | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -61,7 +61,8 @@ void FuncUnitWrapper::setFormatInterface(const AIEBaseMCFormats *Formats) { | |
|
|
||
| bool FuncUnitWrapper::operator==(const FuncUnitWrapper &Other) const { | ||
| return Required == Other.Required && Reserved == Other.Reserved && | ||
| Slots == Other.Slots && MemoryBanks == Other.MemoryBanks && | ||
| Slots == Other.Slots && Conflicts == Other.Conflicts && | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we have to check slots and conflicts separately? |
||
| MemoryBanks == Other.MemoryBanks && | ||
| MemObjectsBits == Other.MemObjectsBits; | ||
| } | ||
|
|
||
|
|
@@ -95,6 +96,7 @@ void FuncUnitWrapper::clearResources() { | |
| Required.clear(); | ||
| Reserved.clear(); | ||
| Slots = 0; | ||
| Conflicts = 0; | ||
| MemoryBanks = 0; | ||
| MemObjectsBits = 0; | ||
| } | ||
|
|
@@ -117,6 +119,7 @@ FuncUnitWrapper &FuncUnitWrapper::operator|=(const FuncUnitWrapper &Other) { | |
| Required |= Other.Required; | ||
| Reserved |= Other.Reserved; | ||
| Slots |= Other.Slots; | ||
| Conflicts |= Other.Conflicts; | ||
| MemoryBanks |= Other.MemoryBanks; | ||
| MemObjectsBits |= Other.MemObjectsBits; | ||
| return *this; | ||
|
|
@@ -125,6 +128,7 @@ FuncUnitWrapper &FuncUnitWrapper::operator|=(const FuncUnitWrapper &Other) { | |
| bool FuncUnitWrapper::conflict(const FuncUnitWrapper &Other) const { | ||
| if ((Slots & Other.Slots) != 0 || (MemoryBanks & Other.MemoryBanks) != 0 || | ||
| (MemObjectsBits & Other.MemObjectsBits) != 0 || | ||
| (Conflicts & Other.Slots) != 0 || (Slots & Other.Conflicts) != 0 || | ||
| Required.overlap(Other.Required) || Reserved.overlap(Other.Required) || | ||
| Required.overlap(Other.Reserved)) { | ||
|
|
||
|
|
@@ -135,7 +139,7 @@ bool FuncUnitWrapper::conflict(const FuncUnitWrapper &Other) const { | |
| // This allows representing a blocked cycle (Slots = ~0) without knowing | ||
| // the slot and format details. | ||
| return Slots && Other.Slots && | ||
| !FormatInterface->getPacketFormats().getFormat(Slots | Other.Slots); | ||
| !FormatInterface->isFormatAvailable(Slots | Other.Slots); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why aren't we checking conflict bits here?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we are, at line 131, returning early if we find a conflict. |
||
| } | ||
|
|
||
| namespace { | ||
|
|
@@ -482,6 +486,15 @@ static SlotBits getSlotSet(const MCInstrDesc &Desc, | |
| return IgnoreUnkownSlotSets ? 0 : ~0; | ||
| } | ||
|
|
||
| static SlotBits getConflictSet(const MCInstrDesc &Desc, | ||
| const AIEBaseMCFormats &Formats) { | ||
| MCSlotKind SlotKind = Formats.getSlotKind(Desc.getOpcode()); | ||
| if (SlotKind != MCSlotKind()) | ||
| return Formats.getSlotInfo(SlotKind)->getConflictSet(); | ||
|
|
||
| return 0; | ||
| } | ||
|
|
||
| namespace { | ||
| auto toHazardType(bool Conflict) { | ||
| return Conflict ? ScheduleHazardRecognizer::NoopHazard | ||
|
|
@@ -525,8 +538,9 @@ ScheduleHazardRecognizer::HazardType AIEHazardRecognizer::getHazardType( | |
| return toHazardType(checkConflict( | ||
| TheScoreboard, ItinData, SchedClass, | ||
| getSlotSet(Desc, *TII->getFormatInterface(), IgnoreUnknownSlotSets), | ||
| MemoryBanks, MemObjectsBits, TII->getMemoryCycles(SchedClass), | ||
| DeltaCycles, FUDepthLimit)); | ||
| getConflictSet(Desc, *TII->getFormatInterface()), MemoryBanks, | ||
| MemObjectsBits, TII->getMemoryCycles(SchedClass), DeltaCycles, | ||
| FUDepthLimit)); | ||
| } | ||
|
|
||
| bool AIEHazardRecognizer::checkConflict( | ||
|
|
@@ -540,25 +554,28 @@ bool AIEHazardRecognizer::checkConflict( | |
| return checkConflict( | ||
| Scoreboard, ItinData, SchedClass, | ||
| getSlotSet(Desc, *TII->getFormatInterface(), IgnoreUnknownSlotSets), | ||
| MemoryBanks, MemObjectsBits, TII->getMemoryCycles(SchedClass), | ||
| DeltaCycles, std::nullopt); | ||
| getConflictSet(Desc, *TII->getFormatInterface()), MemoryBanks, | ||
| MemObjectsBits, TII->getMemoryCycles(SchedClass), DeltaCycles, | ||
| std::nullopt); | ||
| } | ||
|
|
||
| bool AIEHazardRecognizer::checkConflict( | ||
| const ResourceScoreboard<FuncUnitWrapper> &Scoreboard, | ||
| const InstrItineraryData *ItinData, unsigned SchedClass, SlotBits SlotSet, | ||
| MemoryBankBits MemoryBanks, MemoryObjectsBits MemObjectsBits, | ||
| SmallVector<int, 2> MemoryAccessCycles, int DeltaCycles, | ||
| std::optional<int> FUDepthLimit) { | ||
| SlotBits ConflictSet, MemoryBankBits MemoryBanks, | ||
| MemoryObjectsBits MemObjectsBits, SmallVector<int, 2> MemoryAccessCycles, | ||
| int DeltaCycles, std::optional<int> FUDepthLimit) { | ||
|
|
||
| // Verify format hazards | ||
| FuncUnitWrapper EmissionCycle(SlotSet); | ||
| FuncUnitWrapper EmissionCycle(SlotSet, ConflictSet); | ||
| if (EmissionCycle.conflict(Scoreboard[DeltaCycles])) | ||
| return true; | ||
|
|
||
| // Verify memory bank and shared object hazards | ||
| if (!MemoryAccessCycles.empty()) { | ||
| FuncUnitWrapper MemoryAccessCycle(/*SlotSet=*/0, MemoryBanks, | ||
| const SlotBits Slots = 0; | ||
| const SlotBits Conflicts = 0; | ||
| FuncUnitWrapper MemoryAccessCycle(Slots, Conflicts, MemoryBanks, | ||
| MemObjectsBits); | ||
|
|
||
| for (auto Cycles : MemoryAccessCycles) { | ||
|
|
@@ -644,13 +661,16 @@ void AIEHazardRecognizer::enterResources( | |
| std::optional<int> FUDepthLimit) { | ||
|
|
||
| // Append slot usage | ||
| FuncUnitWrapper EmissionCycle(SlotSet); | ||
| const SlotBits Conflicts = 0; | ||
| FuncUnitWrapper EmissionCycle(SlotSet, Conflicts); | ||
| Scoreboard[DeltaCycles] |= EmissionCycle; | ||
|
|
||
| // Append memory bank usage | ||
| if (!MemoryAccessCycles.empty()) { | ||
| const SlotBits Slots = 0; | ||
| const SlotBits Conflicts = 0; | ||
| FuncUnitWrapper MemoryBankAndObjectsAccessCycle( | ||
| /*SlotSet=*/0, MemoryBanks, MemObjectsBits); | ||
| Slots, Conflicts, MemoryBanks, MemObjectsBits); | ||
| for (auto Cycles : MemoryAccessCycles) { | ||
| assert(Scoreboard.isInRange(DeltaCycles + Cycles - 1)); | ||
| Scoreboard[DeltaCycles + Cycles - 1] |= MemoryBankAndObjectsAccessCycle; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,7 +4,7 @@ | |
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| // (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates | ||
| // (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| // Utility classes to interface the generated Formats from CodeGenFormat with | ||
|
|
@@ -93,18 +93,23 @@ class MCSlotInfo { | |
| const char *SlotName; | ||
| /// Size of the slot (in bits) | ||
| const unsigned Size; | ||
| /// Bitset representing the occupancy of the slot | ||
| /// Bitset representing the occupancy of the slots | ||
| const SlotBits SlotOccupancy; | ||
| /// Opcode of the NOP inst. attached to the slot | ||
| /// The closure of SlotOccupancy with the computed exclusions, | ||
| /// e.g. XM implies X and M | ||
| const SlotBits ConflictBits; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we need a new |
||
| /// Opcode of the NOP instruction attached to the slot | ||
| const unsigned NopOpc; | ||
|
|
||
| public: | ||
| constexpr MCSlotInfo(const char *SlotName, unsigned Size, SlotBits Bits, | ||
| unsigned NopOpc) | ||
| : SlotName(SlotName), Size(Size), SlotOccupancy(Bits), NopOpc(NopOpc) {} | ||
| SlotBits ConflictBits, unsigned NopOpc) | ||
| : SlotName(SlotName), Size(Size), SlotOccupancy(Bits), | ||
| ConflictBits(ConflictBits), NopOpc(NopOpc) {} | ||
|
|
||
| const char *getName() const { return SlotName; } | ||
| SlotBits getSlotSet() const { return SlotOccupancy; } | ||
| SlotBits getConflictSet() const { return ConflictBits; } | ||
| unsigned getNOPOpcode() const { return NopOpc; } | ||
| unsigned getSize() const { return Size; } | ||
| }; | ||
|
|
@@ -384,11 +389,15 @@ class AIEBaseMCFormats { | |
|
|
||
| virtual const PacketFormats &getPacketFormats() const = 0; | ||
|
|
||
| virtual ArrayRef<bool> getIsFormatAvailable() const = 0; | ||
|
|
||
| // \return all Slots that correspond to the load instructions | ||
| virtual SmallVector<MCSlotKind, 2> getLoadSlotKinds() const { | ||
| llvm_unreachable("Target didn't implement getLoadSlotKinds()"); | ||
| } | ||
|
|
||
| bool isFormatAvailable(uint64_t SlotSet) const; | ||
|
|
||
| protected: | ||
| /// Check if the Instruction is indeed into the Tables. | ||
| void checkInstructionIsSupported(unsigned int Opcode) const; | ||
|
|
@@ -402,6 +411,7 @@ class AIEMCFormats : public AIEBaseMCFormats { | |
| getFormatDescIndex(unsigned int Opcode) const override; | ||
| const MCSlotInfo *getSlotInfo(const MCSlotKind Kind) const override; | ||
| const MCFormatDesc *getMCFormats() const override; | ||
| ArrayRef<bool> getIsFormatAvailable() const override; | ||
| const PacketFormats &getPacketFormats() const override; | ||
| }; | ||
|
|
||
|
|
@@ -414,6 +424,7 @@ class AIE2MCFormats : public AIEBaseMCFormats { | |
| const MCSlotInfo *getSlotInfo(const MCSlotKind Kind) const override; | ||
| const MCFormatDesc *getMCFormats() const override; | ||
| const PacketFormats &getPacketFormats() const override; | ||
| ArrayRef<bool> getIsFormatAvailable() const override; | ||
| SmallVector<MCSlotKind, 2> getLoadSlotKinds() const override; | ||
| }; | ||
|
|
||
|
|
@@ -426,6 +437,7 @@ class AIE2PMCFormats : public AIEBaseMCFormats { | |
| const MCSlotInfo *getSlotInfo(const MCSlotKind Kind) const override; | ||
| const MCFormatDesc *getMCFormats() const override; | ||
| const PacketFormats &getPacketFormats() const override; | ||
| ArrayRef<bool> getIsFormatAvailable() const override; | ||
| SmallVector<MCSlotKind, 2> getLoadSlotKinds() const override; | ||
| }; | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is used for e.g. nop slots that are just added to drive format selection, but don't occupy existing slots.