Skip to content

Commit b2937da

Browse files
[AIEX] Add a Pass to expand unallocated 2D/3D into individual ones
If we don't need a full register, we can expand to individual lanes. Co-Authored-By: Krishnam Tibrewala <[email protected]>
1 parent 24fbcd9 commit b2937da

12 files changed

+269
-59
lines changed

llvm/lib/Target/AIE/AIE.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ MachineFunctionPass *createAIEEliminateDuplicatePHI();
6060
FunctionPass *createAIEOutlineMemoryGEP();
6161
FunctionPass *createAIESuperRegRewriter();
6262
FunctionPass *createAIEWawRegRewriter();
63+
FunctionPass *createAIEUnallocatedSuperRegRewriter();
6364
FunctionPass *createAIEPostSelectOptimize();
6465
MachineFunctionPass *
6566
createDeadMachineInstructionElim(bool KeepLifetimeInstructions);
@@ -84,6 +85,8 @@ extern char &AIESuperRegRewriterID;
8485
void initializeAIESuperRegRewriterPass(PassRegistry &);
8586
extern char &AIEWawRegRewriterID;
8687
void initializeAIEWawRegRewriterPass(PassRegistry &);
88+
extern char &AIEUnallocatedSuperRegRewriterID;
89+
void initializeAIEUnallocatedSuperRegRewriterPass(PassRegistry &);
8790
extern char &AIEOutlineMemoryGEPID;
8891
void initializeAIEOutlineMemoryGEPPass(PassRegistry &);
8992

llvm/lib/Target/AIE/AIEBaseTargetMachine.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAIETarget() {
158158
initializeAIEPseudoBranchExpansionPass(*PR);
159159
initializeAIESubRegConstrainerPass(*PR);
160160
initializeAIESuperRegRewriterPass(*PR);
161+
initializeAIEUnallocatedSuperRegRewriterPass(*PR);
161162
initializeAIEWawRegRewriterPass(*PR);
162163
initializeAIEOutlineMemoryGEPPass(*PR);
163164
initializeAIEFinalizeBundlePass(*PR);

llvm/lib/Target/AIE/AIESuperRegUtils.cpp

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -184,21 +184,26 @@ LaneBitmask getLiveLanesAt(SlotIndex Index, Register Reg,
184184
return LiveLanes;
185185
}
186186

187-
void rewriteSuperReg(Register Reg, Register AssignedPhysReg,
187+
void rewriteSuperReg(Register Reg, std::optional<Register> AssignedPhysReg,
188188
SmallSet<int, 8> &SubRegs, MachineRegisterInfo &MRI,
189189
const AIEBaseRegisterInfo &TRI, VirtRegMap &VRM,
190190
LiveRegMatrix &LRM, LiveIntervals &LIS,
191191
SlotIndexes &Indexes, LiveDebugVariables &DebugVars) {
192192
LLVM_DEBUG(dbgs() << "Rewriting " << printReg(Reg, &TRI, 0, &MRI) << '\n');
193-
auto *TII = static_cast<const AIEBaseInstrInfo *>(
194-
VRM.getMachineFunction().getSubtarget().getInstrInfo());
193+
MachineFunction &MF = VRM.getMachineFunction();
194+
auto *TII =
195+
static_cast<const AIEBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
195196

196197
// Collect all the subreg indices to rewrite as independent vregs.
197198
SmallMapVector<int, Register, 8> SubRegToVReg;
198199
const TargetRegisterClass *SuperRC = MRI.getRegClass(Reg);
199200
assert(!SubRegs.empty());
200201
for (int SubReg : SubRegs) {
201-
const TargetRegisterClass *SubRC = TRI.getSubRegisterClass(SuperRC, SubReg);
202+
const TargetRegisterClass *SubRC =
203+
AssignedPhysReg.has_value()
204+
? TRI.getSubRegisterClass(SuperRC, SubReg)
205+
: TRI.getLargestLegalSuperClass(
206+
TRI.getSubRegisterClass(SuperRC, SubReg), MF);
202207
SubRegToVReg[SubReg] = MRI.createVirtualRegister(SubRC);
203208
}
204209

@@ -246,7 +251,6 @@ void rewriteSuperReg(Register Reg, Register AssignedPhysReg,
246251
LIS.removeInterval(Reg);
247252

248253
for (auto &[SubRegIdx, VReg] : SubRegToVReg) {
249-
MCRegister SubPhysReg = TRI.getSubReg(AssignedPhysReg, SubRegIdx);
250254
LiveInterval &SubRegLI = LIS.getInterval(VReg);
251255
LLVM_DEBUG(dbgs() << " Assigning Range: " << SubRegLI << '\n');
252256

@@ -257,10 +261,13 @@ void rewriteSuperReg(Register Reg, Register AssignedPhysReg,
257261
LIComponents.push_back(&SubRegLI);
258262
VRM.grow();
259263

260-
for (LiveInterval *LI : LIComponents) {
261-
LRM.assign(*LI, SubPhysReg);
262-
VRM.setRequiredPhys(LI->reg(), SubPhysReg);
263-
LLVM_DEBUG(dbgs() << " Assigned " << printReg(LI->reg()) << "\n");
264+
if (AssignedPhysReg.has_value()) {
265+
MCRegister SubPhysReg = TRI.getSubReg(*AssignedPhysReg, SubRegIdx);
266+
for (LiveInterval *LI : LIComponents) {
267+
LRM.assign(*LI, SubPhysReg);
268+
VRM.setRequiredPhys(LI->reg(), SubPhysReg);
269+
LLVM_DEBUG(dbgs() << " Assigned " << printReg(LI->reg()) << "\n");
270+
}
264271
}
265272
}
266273

llvm/lib/Target/AIE/AIESuperRegUtils.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#define LLVM_LIB_TARGET_AIE_AIESUPERREGUTILS_H
1616

1717
#include "llvm/ADT/SmallSet.h"
18+
#include <optional>
1819

1920
namespace llvm {
2021
class Register;
@@ -63,7 +64,7 @@ void rewriteFullCopy(MachineInstr &MI, const std::set<int> &CopySubRegs,
6364
LaneBitmask getLiveLanesAt(SlotIndex Index, Register Reg,
6465
const LiveIntervals &LIS);
6566

66-
void rewriteSuperReg(Register Reg, Register AssignedPhysReg,
67+
void rewriteSuperReg(Register Reg, std::optional<Register> AssignedPhysReg,
6768
SmallSet<int, 8> &SubRegs, MachineRegisterInfo &MRI,
6869
const AIEBaseRegisterInfo &TRI, VirtRegMap &VRM,
6970
LiveRegMatrix &LRM, LiveIntervals &LIS,
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
//===-- AIEUnallocatedSuperRegRewriter.cpp - Constrain tied sub-registers -===//
2+
//
3+
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
// (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#include "AIEBaseInstrInfo.h"
12+
#include "AIEBaseRegisterInfo.h"
13+
#include "AIESuperRegUtils.h"
14+
15+
#include "llvm/ADT/MapVector.h"
16+
#include "llvm/ADT/SmallSet.h"
17+
#include "llvm/CodeGen/LiveDebugVariables.h"
18+
#include "llvm/CodeGen/LiveIntervals.h"
19+
#include "llvm/CodeGen/LiveRegMatrix.h"
20+
#include "llvm/CodeGen/LiveStacks.h"
21+
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
22+
#include "llvm/CodeGen/MachineFunction.h"
23+
#include "llvm/CodeGen/MachineFunctionPass.h"
24+
#include "llvm/CodeGen/MachineInstr.h"
25+
#include "llvm/CodeGen/MachineInstrBuilder.h"
26+
#include "llvm/CodeGen/MachineOperand.h"
27+
#include "llvm/CodeGen/MachineRegisterInfo.h"
28+
#include "llvm/CodeGen/Passes.h"
29+
#include "llvm/CodeGen/SlotIndexes.h"
30+
#include "llvm/CodeGen/TargetInstrInfo.h"
31+
#include "llvm/CodeGen/TargetSubtargetInfo.h"
32+
#include "llvm/CodeGen/VirtRegMap.h"
33+
#include "llvm/Support/Debug.h"
34+
#include "llvm/Support/raw_ostream.h"
35+
36+
using namespace llvm;
37+
38+
#define DEBUG_TYPE "aie-ra-prepare"
39+
40+
namespace {
41+
42+
using RegRewriteInfo = std::vector<std::pair<Register, SmallSet<int, 8>>>;
43+
44+
/// Split large unallocated compound registers into multiple new smaller vregs
45+
/// Than can be allocated to scalar registers.
46+
class AIEUnallocatedSuperRegRewriter : public MachineFunctionPass {
47+
48+
public:
49+
static char ID;
50+
AIEUnallocatedSuperRegRewriter() : MachineFunctionPass(ID) {}
51+
52+
void getAnalysisUsage(AnalysisUsage &AU) const override {
53+
AU.setPreservesCFG();
54+
AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
55+
AU.addRequired<VirtRegMapWrapperLegacy>();
56+
AU.addPreserved<VirtRegMapWrapperLegacy>();
57+
AU.addRequired<SlotIndexesWrapperPass>();
58+
AU.addPreserved<SlotIndexesWrapperPass>();
59+
AU.addRequired<LiveDebugVariablesWrapperLegacy>();
60+
AU.addPreserved<LiveDebugVariablesWrapperLegacy>();
61+
AU.addRequired<LiveStacksWrapperLegacy>();
62+
AU.addPreserved<LiveStacksWrapperLegacy>();
63+
AU.addRequired<LiveIntervalsWrapperPass>();
64+
AU.addPreserved<LiveIntervalsWrapperPass>();
65+
AU.addRequired<LiveRegMatrixWrapperLegacy>();
66+
AU.addPreserved<LiveRegMatrixWrapperLegacy>();
67+
MachineFunctionPass::getAnalysisUsage(AU);
68+
}
69+
70+
bool runOnMachineFunction(MachineFunction &Fn) override;
71+
};
72+
73+
/// Identify unallocated virtual registers that can be split into subregisters.
74+
/// Returns a list of candidate registers with their rewritable subregister
75+
/// indices, excluding unused registers and those already assigned to physical
76+
/// registers.
77+
static RegRewriteInfo getRewriteCandidates(MachineRegisterInfo &MRI,
78+
const AIEBaseRegisterInfo &TRI,
79+
VirtRegMap &VRM) {
80+
RegRewriteInfo RegistersToRewrite;
81+
for (unsigned VRegIdx = 0, End = MRI.getNumVirtRegs(); VRegIdx != End;
82+
++VRegIdx) {
83+
const Register Reg = Register::index2VirtReg(VRegIdx);
84+
85+
// Ignore un-used registers
86+
if (MRI.reg_nodbg_empty(Reg) || VRM.hasPhys(Reg))
87+
continue;
88+
89+
SmallSet<int, 8> RewritableSubRegs =
90+
AIESuperRegUtils::getRewritableSubRegs(Reg, MRI, TRI);
91+
92+
if (RewritableSubRegs.empty())
93+
continue;
94+
95+
LLVM_DEBUG(dbgs() << "Candidate " << printReg(Reg, &TRI, 0, &MRI) << ":"
96+
<< printRegClassOrBank(Reg, MRI, &TRI) << '\n');
97+
98+
RegistersToRewrite.push_back({Reg, RewritableSubRegs});
99+
}
100+
101+
LLVM_DEBUG(dbgs() << "Found " << RegistersToRewrite.size()
102+
<< " candidate register(s) for rewriting\n");
103+
104+
return RegistersToRewrite;
105+
}
106+
107+
/// Split candidate registers into independent virtual registers for each
108+
/// subregister. Each composite register is rewritten using its subregister
109+
/// indices, with live intervals and debug information updated accordingly.
110+
void rewriteCandidates(RegRewriteInfo &RegistersToRewrite,
111+
MachineRegisterInfo &MRI, const AIEBaseRegisterInfo &TRI,
112+
VirtRegMap &VRM, LiveRegMatrix &LRM, LiveIntervals &LIS,
113+
SlotIndexes &Indexes, LiveDebugVariables &DebugVars) {
114+
115+
LLVM_DEBUG(dbgs() << "Rewriting " << RegistersToRewrite.size()
116+
<< " candidate register(s)\n");
117+
118+
for (auto [VReg, SubRegs] : RegistersToRewrite) {
119+
LLVM_DEBUG(dbgs() << " Rewriting " << printReg(VReg, &TRI, 0, &MRI)
120+
<< " into " << SubRegs.size() << " subregister(s)\n");
121+
AIESuperRegUtils::rewriteSuperReg(
122+
VReg, /*std::optional<Register> AssignedPhysReg = */ {}, SubRegs, MRI,
123+
TRI, VRM, LRM, LIS, Indexes, DebugVars);
124+
}
125+
}
126+
127+
bool AIEUnallocatedSuperRegRewriter::runOnMachineFunction(MachineFunction &MF) {
128+
LLVM_DEBUG(llvm::dbgs() << "*** Splitting unallocated super-registers: "
129+
<< MF.getName() << " ***\n");
130+
131+
MachineRegisterInfo &MRI = MF.getRegInfo();
132+
VirtRegMap &VRM = getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
133+
LiveRegMatrix &LRM = getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
134+
LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
135+
SlotIndexes &Indexes = getAnalysis<SlotIndexesWrapperPass>().getSI();
136+
LiveDebugVariables &DebugVars =
137+
getAnalysis<LiveDebugVariablesWrapperLegacy>().getLDV();
138+
auto &TRI =
139+
*static_cast<const AIEBaseRegisterInfo *>(MRI.getTargetRegisterInfo());
140+
141+
LLVM_DEBUG(dbgs() << "Identifying rewrite candidates...\n");
142+
RegRewriteInfo RegistersToRewrite = getRewriteCandidates(MRI, TRI, VRM);
143+
144+
if (RegistersToRewrite.empty()) {
145+
LLVM_DEBUG(dbgs() << "No candidates found, skipping rewrite\n");
146+
return false;
147+
}
148+
149+
LLVM_DEBUG(dbgs() << "Performing register rewrites...\n");
150+
rewriteCandidates(RegistersToRewrite, MRI, TRI, VRM, LRM, LIS, Indexes,
151+
DebugVars);
152+
153+
LLVM_DEBUG(dbgs() << "Successfully rewrote " << RegistersToRewrite.size()
154+
<< " register(s)\n");
155+
156+
return !RegistersToRewrite.empty();
157+
}
158+
159+
} // end anonymous namespace
160+
161+
char AIEUnallocatedSuperRegRewriter::ID = 0;
162+
char &llvm::AIEUnallocatedSuperRegRewriterID =
163+
AIEUnallocatedSuperRegRewriter::ID;
164+
165+
INITIALIZE_PASS(AIEUnallocatedSuperRegRewriter,
166+
"aie-unallocated-superreg-rewrite",
167+
"AIE unallocated super-reg rewrite", false, false)
168+
169+
llvm::FunctionPass *llvm::createAIEUnallocatedSuperRegRewriter() {
170+
return new AIEUnallocatedSuperRegRewriter();
171+
}

llvm/lib/Target/AIE/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ add_llvm_target(AIECodeGen
141141
AIE2TargetMachine.cpp
142142
AIE2TargetTransformInfo.cpp
143143
AIETiedRegOperands.cpp
144+
AIEUnallocatedSuperRegRewriter.cpp
144145
ReservedRegsLICM.cpp
145146
AIEOutlineMemoryGEP.cpp
146147
AIEWawRegRewriter.cpp

llvm/lib/Target/AIE/aie2p/AIE2PTargetMachine.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ bool AIE2PPassConfig::addRegAssignAndRewriteOptimized() {
114114
addPass(createAIESuperRegRewriter());
115115
addPass(createGreedyRegisterAllocator(onlyAllocate3D2DRegisters));
116116
addPass(createAIESuperRegRewriter());
117+
if (EnableFineGrainedStagedRA)
118+
addPass(createAIEUnallocatedSuperRegRewriter());
117119
}
118120
addPass(createGreedyRegisterAllocator());
119121
if (EnableWAWRegRewrite) {

llvm/test/CodeGen/AIE/aie2p/llc-pipeline-aie2p.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@
247247
; AIE-O1-NEXT: AIE super-reg rewrite
248248
; AIE-O1-NEXT: Greedy Register Allocator
249249
; AIE-O1-NEXT: AIE super-reg rewrite
250+
; AIE-O1-NEXT: AIE unallocated super-reg rewrite
250251
; AIE-O1-NEXT: Greedy Register Allocator
251252
; AIE-O1-NEXT: AIE waw-reg rewrite
252253
; AIE-O1-NEXT: Greedy Register Allocator
@@ -472,6 +473,7 @@
472473
; AIE-O23-NEXT: AIE super-reg rewrite
473474
; AIE-O23-NEXT: Greedy Register Allocator
474475
; AIE-O23-NEXT: AIE super-reg rewrite
476+
; AIE-O23-NEXT: AIE unallocated super-reg rewrite
475477
; AIE-O23-NEXT: Greedy Register Allocator
476478
; AIE-O23-NEXT: AIE waw-reg rewrite
477479
; AIE-O23-NEXT: Greedy Register Allocator

0 commit comments

Comments
 (0)