Skip to content
2 changes: 2 additions & 0 deletions llvm/include/llvm/CodeGen/LiveIntervals.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ class VirtRegMap;
LiveIntervals();
~LiveIntervals() override;

const TargetInstrInfo &getTargetInstrInfo() const { return *TII; }

/// Calculate the spill weight to assign to a single instruction.
static float getSpillWeight(bool isDef, bool isUse,
const MachineBlockFrequencyInfo *MBFI,
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/CodeGen/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,8 @@ namespace llvm {
///
FunctionPass *createGreedyRegisterAllocator();
FunctionPass *createGreedyRegisterAllocator(RegClassFilterFunc F);
FunctionPass *createGreedyRegisterAllocator(RegClassFilterFunc F,
LiveIntervalFilterFunc LIF);

/// PBQPRegisterAllocation Pass - This pass implements the Partitioned Boolean
/// Quadratic Prograaming (PBQP) based register allocator.
Expand Down
15 changes: 15 additions & 0 deletions llvm/include/llvm/CodeGen/RegAllocCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ namespace llvm {
class TargetRegisterClass;
class TargetRegisterInfo;

class MachineRegisterInfo;
class TargetInstrInfo;
class LiveInterval;

typedef std::function<bool(const TargetRegisterInfo &TRI,
const TargetRegisterClass &RC)> RegClassFilterFunc;

Expand All @@ -26,6 +30,17 @@ static inline bool allocateAllRegClasses(const TargetRegisterInfo &,
return true;
}

typedef std::function<bool(MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
const LiveInterval *LI)>
LiveIntervalFilterFunc;
/// Default live interval filter function for register allocation. All live
/// intervals should be allocated.
static inline bool allocateAllLiveIntervals(MachineRegisterInfo &,
const TargetInstrInfo &,
const LiveInterval *) {
return true;
}

} // namespace llvm

#endif // LLVM_CODEGEN_REGALLOCCOMMON_H
9 changes: 7 additions & 2 deletions llvm/lib/CodeGen/RegAllocBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,13 @@ void RegAllocBase::enqueue(const LiveInterval *LI) {

const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
if (ShouldAllocateClass(*TRI, RC)) {
LLVM_DEBUG(dbgs() << "Enqueuing " << printReg(Reg, TRI) << '\n');
enqueueImpl(LI);
if (ShouldAllocateLiveInterval(*MRI, LIS->getTargetInstrInfo(), LI)) {
LLVM_DEBUG(dbgs() << "Enqueuing " << printReg(Reg, TRI) << '\n');
enqueueImpl(LI);
} else {
LLVM_DEBUG(dbgs() << "Not enqueueing " << printReg(Reg, TRI)
<< " in skipped live interval\n");
}
} else {
LLVM_DEBUG(dbgs() << "Not enqueueing " << printReg(Reg, TRI)
<< " in skipped register class\n");
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/CodeGen/RegAllocBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,17 @@ class RegAllocBase {
LiveRegMatrix *Matrix = nullptr;
RegisterClassInfo RegClassInfo;
const RegClassFilterFunc ShouldAllocateClass;
const LiveIntervalFilterFunc ShouldAllocateLiveInterval;

/// Inst which is a def of an original reg and whose defs are already all
/// dead after remat is saved in DeadRemats. The deletion of such inst is
/// postponed till all the allocations are done, so its remat expr is
/// always available for the remat of all the siblings of the original reg.
SmallPtrSet<MachineInstr *, 32> DeadRemats;

RegAllocBase(const RegClassFilterFunc F = allocateAllRegClasses) :
ShouldAllocateClass(F) {}
RegAllocBase(const RegClassFilterFunc F = allocateAllRegClasses,
const LiveIntervalFilterFunc LIF = allocateAllLiveIntervals)
: ShouldAllocateClass(F), ShouldAllocateLiveInterval(LIF) {}

virtual ~RegAllocBase() = default;

Expand Down
10 changes: 7 additions & 3 deletions llvm/lib/CodeGen/RegAllocGreedy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,11 +205,15 @@ FunctionPass *llvm::createGreedyRegisterAllocator(RegClassFilterFunc Ftor) {
return new RAGreedy(Ftor);
}

RAGreedy::RAGreedy(RegClassFilterFunc F):
MachineFunctionPass(ID),
RegAllocBase(F) {
FunctionPass *
llvm::createGreedyRegisterAllocator(RegClassFilterFunc Ftor,
LiveIntervalFilterFunc LIFtor) {
return new RAGreedy(Ftor, LIFtor);
}

RAGreedy::RAGreedy(RegClassFilterFunc F, LiveIntervalFilterFunc LIF)
: MachineFunctionPass(ID), RegAllocBase(F, LIF) {}

void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<MachineBlockFrequencyInfo>();
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/CodeGen/RegAllocGreedy.h
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,8 @@ class LLVM_LIBRARY_VISIBILITY RAGreedy : public MachineFunctionPass,
bool ReverseLocalAssignment = false;

public:
RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses);
RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses,
const LiveIntervalFilterFunc LIF = allocateAllLiveIntervals);

/// Return the pass name.
StringRef getPassName() const override { return "Greedy Register Allocator"; }
Expand Down
171 changes: 170 additions & 1 deletion llvm/lib/Target/AIE/aie2p/AIE2PTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "AIE2PTargetMachine.h"
#include "AIE2PTargetTransformInfo.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"

using namespace llvm;
Expand Down Expand Up @@ -66,6 +67,173 @@ void AIE2PPassConfig::addPreRegBankSelect() {
}
}

static bool onlyAllocateLIwith3DInstruction(MachineRegisterInfo &MRI,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: onlyAllocateLIWith3DInstruction

Copy link
Collaborator

@andcarminati andcarminati Apr 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have access to TII, so we could move this switch to that class in a AIE-specific hook. We could also use a nice name, something like isHighPriorityLIUseInstruction (not sure about this name).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the idea of exposing TII was to move switch under a AIE-specific hook.
Chose to delay that until I see some result.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess it could work with just target instructions, because we already have the 3D filter:

static bool onlyAllocateTargetInstructions(MachineRegisterInfo &MRI,
                                            const TargetInstrInfo &TII,
                                            const LiveInterval *LI) {
  const Register Reg = LI->reg();
  return std::any_of(
      MRI.use_nodbg_instructions(Reg).begin(),
      MRI.use_nodbg_instructions(Reg).end(), [&](const MachineInstr &MI) {
        return MI.getOpcode() > TargetOpcode::GENERIC_OP_END;
    });
}

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not exactly sure of your approach

I wanted that after the COPY are generated by the live rage splitting logic, which will introduce more 3D regs, instructions that require the construct of 3D reg ( by construct I mean to create a 3d reg the sub reg used matters "m1 dn1 dj1 dc1 dn5 dj5 dc5") function onlyAllocateLIwith3DInstruction(...) focuses on such instructions only.

the function you have suggested would do the same as createGreedyRegisterAllocator(onlyAllocate3DRegisters)

Ahh I think by 3D filter you mean "3D" reg filter, and what I wanted was combination of 3D reg filter and 3D use instructions.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We already filtered by register class, so, if we consider only MI.getOpcode() > TargetOpcode::GENERIC_OP_END We will exclude all copy and other generic instructions.

BTW, with the recent bump, they changed the filter-by-class by filter-by-reg, we there is no need to change core regalloc anymore, that is a nice change ;-)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh I see what you mean, that will be nice.

const TargetInstrInfo &TII,
const LiveInterval *LI) {
const Register Reg = LI->reg();
return std::any_of(
MRI.use_nodbg_instructions(Reg).begin(),
MRI.use_nodbg_instructions(Reg).end(), [&](const MachineInstr &MI) {
switch (MI.getOpcode()) {
case AIE2P::LDA_3D_dms_lda:
case AIE2P::LDA_3D_dmv_lda_q:
case AIE2P::LDA_3D_s16:
case AIE2P::LDA_3D_s8:
case AIE2P::LDA_3D_u16:
case AIE2P::LDA_3D_u8:
case AIE2P::LDA_TM_3D:
case AIE2P::ST_3D_dms_sts:
case AIE2P::ST_3D_dmv_sts_q:
case AIE2P::ST_3D_s16:
case AIE2P::ST_3D_s8:
case AIE2P::ST_TM_3D:
case AIE2P::VLDA_3D_128:
case AIE2P::VLDA_3D_CONV_fp32_bf16_dmw_lda_ups_bf:
case AIE2P::VLDA_3D_CONV_fp32_bf16_dmx_lda_ups_bf:
case AIE2P::VLDA_3D_dmw_lda_w:
case AIE2P::VLDA_3D_dmx_lda_bm:
case AIE2P::VLDA_3D_dmx_lda_fifohl:
case AIE2P::VLDA_3D_dmx_lda_x:
case AIE2P::VLDB_3D_128:
case AIE2P::VLDB_3D_UNPACK_dmw_ldb_unpack_unpackSign0:
case AIE2P::VLDB_3D_UNPACK_dmw_ldb_unpack_unpackSign1:
case AIE2P::VLDB_3D_UNPACK_dmx_ldb_unpack_unpackSign0:
case AIE2P::VLDB_3D_UNPACK_dmx_ldb_unpack_unpackSign1:
case AIE2P::VLDB_3D_dmw_ldb:
case AIE2P::VLDB_3D_dmx_ldb_x:
case AIE2P::VST_3D_128:
case AIE2P::VST_3D_CONV_bf16_fp32_dmw_sts_srs_bf:
case AIE2P::VST_3D_CONV_bf16_fp32_dmx_sts_srs_bf:
case AIE2P::VST_3D_PACK_dmw_sts_pack_packSign0:
case AIE2P::VST_3D_PACK_dmw_sts_pack_packSign1:
case AIE2P::VST_3D_PACK_dmx_sts_pack_packSign0:
case AIE2P::VST_3D_PACK_dmx_sts_pack_packSign1:
case AIE2P::VST_3D_dmw_sts_w:
case AIE2P::VST_3D_dmx_sts_bm:
case AIE2P::VST_3D_dmx_sts_fifohl:
case AIE2P::VST_3D_dmx_sts_x:
case AIE2P::VLD_3D_w_pseudo:
case AIE2P::VLD_3D_x_pseudo:
case AIE2P::VLD_3D_128_pseudo:
case AIE2P::PADDA_3D:
case AIE2P::PADDB_3D:
case AIE2P::PADDS_3D:
case AIE2P::PADD_3D_pseudo:
case AIE2P::VLDA_3D_UPS_2x_dmw_lda_ups_w2b_upsSign1:
case AIE2P::VLDA_3D_UPS_2x_dmx_lda_ups_x2c_upsSign0:
case AIE2P::VLDA_3D_UPS_2x_dmx_lda_ups_x2c_upsSign1:
case AIE2P::VLDA_3D_UPS_4x_dmw_lda_ups_w2c_upsSign0:
case AIE2P::VLDA_3D_UPS_4x_dmw_lda_ups_w2c_upsSign1:
case AIE2P::VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0:
case AIE2P::VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign1:
case AIE2P::VST_3D_SRS_2x_dm_sts_srs_cm_srsSign0:
case AIE2P::VST_3D_SRS_2x_dm_sts_srs_cm_srsSign1:
case AIE2P::VST_3D_SRS_2x_dmw_sts_srs_bm_srsSign0:
case AIE2P::VST_3D_SRS_2x_dmw_sts_srs_bm_srsSign1:
case AIE2P::VST_3D_SRS_4x_dm_sts_srs_cm_srsSign0:
case AIE2P::VST_3D_SRS_4x_dm_sts_srs_cm_srsSign1:
case AIE2P::VST_3D_SRS_4x_dmx_sts_srs_dm_srsSign0:
case AIE2P::VST_3D_SRS_4x_dmx_sts_srs_dm_srsSign1:
case AIE2P::VST_FLUSH_512_3D:
case AIE2P::VST_FLUSH_512_CONV_3D:
case AIE2P::VLDA_POP_512_3D:
case AIE2P::VLDA_POP_544_3D:
case AIE2P::VLDA_POP_576_3D:
case AIE2P::VLDA_POP_640_3D:
case AIE2P::VLDA_POP_704_3D:
case AIE2P::VLDB_POP_512_3D:
case AIE2P::VLDB_POP_544_3D:
case AIE2P::VLDB_POP_576_3D:
case AIE2P::VLDB_POP_640_3D:
case AIE2P::VLDB_POP_704_3D:
case AIE2P::VLD_POP_512_3D_pseudo:
case AIE2P::VLD_POP_544_3D_pseudo:
case AIE2P::VLD_POP_576_3D_pseudo:
case AIE2P::VLD_POP_640_3D_pseudo:
case AIE2P::VLD_POP_704_3D_pseudo:
case AIE2P::LDA_3D_dms_lda_split:
case AIE2P::LDA_3D_dmv_lda_q_split:
case AIE2P::LDA_3D_s16_split:
case AIE2P::LDA_3D_s8_split:
case AIE2P::LDA_3D_u16_split:
case AIE2P::LDA_3D_u8_split:
case AIE2P::LDA_TM_3D_split:
case AIE2P::ST_3D_dms_sts_split:
case AIE2P::ST_3D_dmv_sts_q_split:
case AIE2P::ST_3D_s16_split:
case AIE2P::ST_3D_s8_split:
case AIE2P::ST_TM_3D_split:
case AIE2P::VLDA_3D_128_split:
case AIE2P::VLDA_3D_CONV_fp32_bf16_dmw_lda_ups_bf_split:
case AIE2P::VLDA_3D_CONV_fp32_bf16_dmx_lda_ups_bf_split:
case AIE2P::VLDA_3D_dmw_lda_w_split:
case AIE2P::VLDA_3D_dmx_lda_bm_split:
case AIE2P::VLDA_3D_dmx_lda_fifohl_split:
case AIE2P::VLDA_3D_dmx_lda_x_split:
case AIE2P::VLDB_3D_128_split:
case AIE2P::VLDB_3D_UNPACK_dmw_ldb_unpack_unpackSign0_split:
case AIE2P::VLDB_3D_UNPACK_dmw_ldb_unpack_unpackSign1_split:
case AIE2P::VLDB_3D_UNPACK_dmx_ldb_unpack_unpackSign0_split:
case AIE2P::VLDB_3D_UNPACK_dmx_ldb_unpack_unpackSign1_split:
case AIE2P::VLDB_3D_dmw_ldb_split:
case AIE2P::VLDB_3D_dmx_ldb_x_split:
case AIE2P::VST_3D_128_split:
case AIE2P::VST_3D_CONV_bf16_fp32_dmw_sts_srs_bf_split:
case AIE2P::VST_3D_CONV_bf16_fp32_dmx_sts_srs_bf_split:
case AIE2P::VST_3D_PACK_dmw_sts_pack_packSign0_split:
case AIE2P::VST_3D_PACK_dmw_sts_pack_packSign1_split:
case AIE2P::VST_3D_PACK_dmx_sts_pack_packSign0_split:
case AIE2P::VST_3D_PACK_dmx_sts_pack_packSign1_split:
case AIE2P::VST_3D_dmw_sts_w_split:
case AIE2P::VST_3D_dmx_sts_bm_split:
case AIE2P::VST_3D_dmx_sts_fifohl_split:
case AIE2P::VST_3D_dmx_sts_x_split:
case AIE2P::VLD_3D_w_pseudo_split:
case AIE2P::VLD_3D_x_pseudo_split:
case AIE2P::VLD_3D_128_pseudo_split:
case AIE2P::PADDA_3D_split:
case AIE2P::PADDB_3D_split:
case AIE2P::PADDS_3D_split:
case AIE2P::PADD_3D_pseudo_split:
case AIE2P::VLDA_3D_UPS_2x_dmw_lda_ups_w2b_upsSign1_split:
case AIE2P::VLDA_3D_UPS_2x_dmx_lda_ups_x2c_upsSign0_split:
case AIE2P::VLDA_3D_UPS_2x_dmx_lda_ups_x2c_upsSign1_split:
case AIE2P::VLDA_3D_UPS_4x_dmw_lda_ups_w2c_upsSign0_split:
case AIE2P::VLDA_3D_UPS_4x_dmw_lda_ups_w2c_upsSign1_split:
case AIE2P::VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0_split:
case AIE2P::VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign1_split:
case AIE2P::VST_3D_SRS_2x_dm_sts_srs_cm_srsSign0_split:
case AIE2P::VST_3D_SRS_2x_dm_sts_srs_cm_srsSign1_split:
case AIE2P::VST_3D_SRS_2x_dmw_sts_srs_bm_srsSign0_split:
case AIE2P::VST_3D_SRS_2x_dmw_sts_srs_bm_srsSign1_split:
case AIE2P::VST_3D_SRS_4x_dm_sts_srs_cm_srsSign0_split:
case AIE2P::VST_3D_SRS_4x_dm_sts_srs_cm_srsSign1_split:
case AIE2P::VST_3D_SRS_4x_dmx_sts_srs_dm_srsSign0_split:
case AIE2P::VST_3D_SRS_4x_dmx_sts_srs_dm_srsSign1_split:
case AIE2P::VST_FLUSH_512_3D_split:
case AIE2P::VST_FLUSH_512_CONV_3D_split:
case AIE2P::VLDA_POP_512_3D_split:
case AIE2P::VLDA_POP_544_3D_split:
case AIE2P::VLDA_POP_576_3D_split:
case AIE2P::VLDA_POP_640_3D_split:
case AIE2P::VLDA_POP_704_3D_split:
case AIE2P::VLDB_POP_512_3D_split:
case AIE2P::VLDB_POP_544_3D_split:
case AIE2P::VLDB_POP_576_3D_split:
case AIE2P::VLDB_POP_640_3D_split:
case AIE2P::VLDB_POP_704_3D_split:
case AIE2P::VLD_POP_512_3D_pseudo_split:
case AIE2P::VLD_POP_544_3D_pseudo_split:
case AIE2P::VLD_POP_576_3D_pseudo_split:
case AIE2P::VLD_POP_640_3D_pseudo_split:
case AIE2P::VLD_POP_704_3D_pseudo_split:
return true;
default:
return false;
}
});
}

static bool onlyAllocate3DRegisters(const TargetRegisterInfo &TRI,
const TargetRegisterClass &RC) {
return AIE2P::eDSRegClass.hasSubClassEq(&RC);
Expand Down Expand Up @@ -97,7 +265,8 @@ bool AIE2PPassConfig::addRegAssignAndRewriteOptimized() {
if (AllocateMRegsFirst)
addPass(createGreedyRegisterAllocator(onlyAllocateMRegisters));
if (EnableStagedRA) {
addPass(createGreedyRegisterAllocator(onlyAllocate3DRegisters));
addPass(createGreedyRegisterAllocator(onlyAllocate3DRegisters,
onlyAllocateLIwith3DInstruction));
addPass(createAIESuperRegRewriter());
addPass(createGreedyRegisterAllocator(onlyAllocate3D2DRegisters));
addPass(createAIESuperRegRewriter());
Expand Down