Skip to content

Commit 9c725ef

Browse files
authored
[AMDGPU][NewPM] Port "GCNRewritePartialRegUses" pass to NPM (#126024)
1 parent 9e5c136 commit 9c725ef

8 files changed

+98
-44
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ extern char &GCNPreRAOptimizationsID;
460460
FunctionPass *createAMDGPUSetWavePriorityPass();
461461
void initializeAMDGPUSetWavePriorityPass(PassRegistry &);
462462

463-
void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &);
463+
void initializeGCNRewritePartialRegUsesLegacyPass(llvm::PassRegistry &);
464464
extern char &GCNRewritePartialRegUsesID;
465465

466466
void initializeAMDGPUWaitSGPRHazardsLegacyPass(PassRegistry &);

llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ FUNCTION_PASS_WITH_PARAMS(
9898
#endif
9999
MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
100100
MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", GCNPreRALongBranchRegPass())
101+
MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
101102
MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
102103
MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
103104
MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
@@ -119,6 +120,7 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
119120
#define DUMMY_MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
120121
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-insert-delay-alu", AMDGPUInsertDelayAluPass())
121122
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass())
123+
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
122124
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
123125
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass())
124126

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "GCNIterativeScheduler.h"
3535
#include "GCNPreRALongBranchReg.h"
3636
#include "GCNPreRAOptimizations.h"
37+
#include "GCNRewritePartialRegUses.h"
3738
#include "GCNSchedStrategy.h"
3839
#include "GCNVOPDUtils.h"
3940
#include "R600.h"
@@ -550,7 +551,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
550551
initializeGCNNSAReassignPass(*PR);
551552
initializeGCNPreRAOptimizationsLegacyPass(*PR);
552553
initializeGCNPreRALongBranchRegLegacyPass(*PR);
553-
initializeGCNRewritePartialRegUsesPass(*PR);
554+
initializeGCNRewritePartialRegUsesLegacyPass(*PR);
554555
initializeGCNRegPressurePrinterPass(*PR);
555556
initializeAMDGPUPreloadKernArgPrologLegacyPass(*PR);
556557
initializeAMDGPUWaitSGPRHazardsLegacyPass(*PR);

llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp

Lines changed: 67 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
/// calculation and creates more possibilities for the code unaware of lanemasks
2929
//===----------------------------------------------------------------------===//
3030

31+
#include "GCNRewritePartialRegUses.h"
3132
#include "AMDGPU.h"
3233
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
3334
#include "SIRegisterInfo.h"
@@ -44,25 +45,7 @@ using namespace llvm;
4445

4546
namespace {
4647

47-
class GCNRewritePartialRegUses : public MachineFunctionPass {
48-
public:
49-
static char ID;
50-
GCNRewritePartialRegUses() : MachineFunctionPass(ID) {}
51-
52-
StringRef getPassName() const override {
53-
return "Rewrite Partial Register Uses";
54-
}
55-
56-
void getAnalysisUsage(AnalysisUsage &AU) const override {
57-
AU.setPreservesCFG();
58-
AU.addPreserved<LiveIntervalsWrapperPass>();
59-
AU.addPreserved<SlotIndexesWrapperPass>();
60-
MachineFunctionPass::getAnalysisUsage(AU);
61-
}
62-
63-
bool runOnMachineFunction(MachineFunction &MF) override;
64-
65-
private:
48+
class GCNRewritePartialRegUsesImpl {
6649
MachineRegisterInfo *MRI;
6750
const SIRegisterInfo *TRI;
6851
const TargetInstrInfo *TII;
@@ -155,13 +138,36 @@ class GCNRewritePartialRegUses : public MachineFunctionPass {
155138
/// Cache for getAllocatableAndAlignedRegClassMask method:
156139
/// AlignNumBits -> Class bitmask.
157140
mutable SmallDenseMap<unsigned, BitVector> AllocatableAndAlignedRegClassMasks;
141+
142+
public:
143+
GCNRewritePartialRegUsesImpl(LiveIntervals *LS) : LIS(LS) {}
144+
bool run(MachineFunction &MF);
145+
};
146+
147+
class GCNRewritePartialRegUsesLegacy : public MachineFunctionPass {
148+
public:
149+
static char ID;
150+
GCNRewritePartialRegUsesLegacy() : MachineFunctionPass(ID) {}
151+
152+
StringRef getPassName() const override {
153+
return "Rewrite Partial Register Uses";
154+
}
155+
156+
void getAnalysisUsage(AnalysisUsage &AU) const override {
157+
AU.setPreservesCFG();
158+
AU.addPreserved<LiveIntervalsWrapperPass>();
159+
AU.addPreserved<SlotIndexesWrapperPass>();
160+
MachineFunctionPass::getAnalysisUsage(AU);
161+
}
162+
163+
bool runOnMachineFunction(MachineFunction &MF) override;
158164
};
159165

160166
} // end anonymous namespace
161167

162168
// TODO: move this to the tablegen and use binary search by Offset.
163-
unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset,
164-
unsigned Size) const {
169+
unsigned GCNRewritePartialRegUsesImpl::getSubReg(unsigned Offset,
170+
unsigned Size) const {
165171
const auto [I, Inserted] = SubRegs.try_emplace({Offset, Size}, 0);
166172
if (Inserted) {
167173
for (unsigned Idx = 1, E = TRI->getNumSubRegIndices(); Idx < E; ++Idx) {
@@ -175,15 +181,14 @@ unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset,
175181
return I->second;
176182
}
177183

178-
unsigned GCNRewritePartialRegUses::shiftSubReg(unsigned SubReg,
179-
unsigned RShift) const {
184+
unsigned GCNRewritePartialRegUsesImpl::shiftSubReg(unsigned SubReg,
185+
unsigned RShift) const {
180186
unsigned Offset = TRI->getSubRegIdxOffset(SubReg) - RShift;
181187
return getSubReg(Offset, TRI->getSubRegIdxSize(SubReg));
182188
}
183189

184-
const uint32_t *
185-
GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC,
186-
unsigned SubRegIdx) const {
190+
const uint32_t *GCNRewritePartialRegUsesImpl::getSuperRegClassMask(
191+
const TargetRegisterClass *RC, unsigned SubRegIdx) const {
187192
const auto [I, Inserted] =
188193
SuperRegMasks.try_emplace({RC, SubRegIdx}, nullptr);
189194
if (Inserted) {
@@ -197,7 +202,8 @@ GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC,
197202
return I->second;
198203
}
199204

200-
const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(
205+
const BitVector &
206+
GCNRewritePartialRegUsesImpl::getAllocatableAndAlignedRegClassMask(
201207
unsigned AlignNumBits) const {
202208
const auto [I, Inserted] =
203209
AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits);
@@ -214,7 +220,7 @@ const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(
214220
}
215221

216222
const TargetRegisterClass *
217-
GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
223+
GCNRewritePartialRegUsesImpl::getRegClassWithShiftedSubregs(
218224
const TargetRegisterClass *RC, unsigned RShift, unsigned RegNumBits,
219225
unsigned CoverSubregIdx, SubRegMap &SubRegs) const {
220226

@@ -289,8 +295,8 @@ GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
289295
}
290296

291297
const TargetRegisterClass *
292-
GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC,
293-
SubRegMap &SubRegs) const {
298+
GCNRewritePartialRegUsesImpl::getMinSizeReg(const TargetRegisterClass *RC,
299+
SubRegMap &SubRegs) const {
294300
unsigned CoverSubreg = AMDGPU::NoSubRegister;
295301
unsigned Offset = std::numeric_limits<unsigned>::max();
296302
unsigned End = 0;
@@ -343,9 +349,8 @@ GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC,
343349

344350
// Only the subrange's lanemasks of the original interval need to be modified.
345351
// Subrange for a covering subreg becomes the main range.
346-
void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg,
347-
Register NewReg,
348-
SubRegMap &SubRegs) const {
352+
void GCNRewritePartialRegUsesImpl::updateLiveIntervals(
353+
Register OldReg, Register NewReg, SubRegMap &SubRegs) const {
349354
if (!LIS->hasInterval(OldReg))
350355
return;
351356

@@ -400,13 +405,13 @@ void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg,
400405
}
401406

402407
const TargetRegisterClass *
403-
GCNRewritePartialRegUses::getOperandRegClass(MachineOperand &MO) const {
408+
GCNRewritePartialRegUsesImpl::getOperandRegClass(MachineOperand &MO) const {
404409
MachineInstr *MI = MO.getParent();
405410
return TII->getRegClass(TII->get(MI->getOpcode()), MI->getOperandNo(&MO), TRI,
406411
*MI->getParent()->getParent());
407412
}
408413

409-
bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
414+
bool GCNRewritePartialRegUsesImpl::rewriteReg(Register Reg) const {
410415
auto Range = MRI->reg_nodbg_operands(Reg);
411416
if (Range.empty() || any_of(Range, [](MachineOperand &MO) {
412417
return MO.getSubReg() == AMDGPU::NoSubRegister; // Whole reg used. [1]
@@ -476,24 +481,44 @@ bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
476481
return true;
477482
}
478483

479-
bool GCNRewritePartialRegUses::runOnMachineFunction(MachineFunction &MF) {
484+
bool GCNRewritePartialRegUsesImpl::run(MachineFunction &MF) {
480485
MRI = &MF.getRegInfo();
481486
TRI = static_cast<const SIRegisterInfo *>(MRI->getTargetRegisterInfo());
482487
TII = MF.getSubtarget().getInstrInfo();
483-
auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
484-
LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
485488
bool Changed = false;
486489
for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) {
487490
Changed |= rewriteReg(Register::index2VirtReg(I));
488491
}
489492
return Changed;
490493
}
491494

492-
char GCNRewritePartialRegUses::ID;
495+
bool GCNRewritePartialRegUsesLegacy::runOnMachineFunction(MachineFunction &MF) {
496+
LiveIntervalsWrapperPass *LISWrapper =
497+
getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
498+
LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
499+
GCNRewritePartialRegUsesImpl Impl(LIS);
500+
return Impl.run(MF);
501+
}
502+
503+
PreservedAnalyses
504+
GCNRewritePartialRegUsesPass::run(MachineFunction &MF,
505+
MachineFunctionAnalysisManager &MFAM) {
506+
auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(MF);
507+
if (!GCNRewritePartialRegUsesImpl(LIS).run(MF))
508+
return PreservedAnalyses::all();
509+
510+
auto PA = getMachineFunctionPassPreservedAnalyses();
511+
PA.preserveSet<CFGAnalyses>();
512+
PA.preserve<LiveIntervalsAnalysis>();
513+
PA.preserve<SlotIndexesAnalysis>();
514+
return PA;
515+
}
516+
517+
char GCNRewritePartialRegUsesLegacy::ID;
493518

494-
char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUses::ID;
519+
char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUsesLegacy::ID;
495520

496-
INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUses, DEBUG_TYPE,
521+
INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUsesLegacy, DEBUG_TYPE,
497522
"Rewrite Partial Register Uses", false, false)
498-
INITIALIZE_PASS_END(GCNRewritePartialRegUses, DEBUG_TYPE,
523+
INITIALIZE_PASS_END(GCNRewritePartialRegUsesLegacy, DEBUG_TYPE,
499524
"Rewrite Partial Register Uses", false, false)
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//===- GCNRewritePartialRegUses.h -------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIB_TARGET_AMDGPU_GCNREWRITEPARTIALREGUSES_H
10+
#define LLVM_LIB_TARGET_AMDGPU_GCNREWRITEPARTIALREGUSES_H
11+
12+
#include "llvm/CodeGen/MachinePassManager.h"
13+
14+
namespace llvm {
15+
class GCNRewritePartialRegUsesPass
16+
: public PassInfoMixin<GCNRewritePartialRegUsesPass> {
17+
public:
18+
PreservedAnalyses run(MachineFunction &MF,
19+
MachineFunctionAnalysisManager &MFAM);
20+
};
21+
} // namespace llvm
22+
23+
#endif // LLVM_LIB_TARGET_AMDGPU_GCNREWRITEPARTIALREGUSES_H

llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
22
# RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s
3+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes="rename-independent-subregs,amdgpu-rewrite-partial-reg-uses" %s -o - | FileCheck -check-prefix=CHECK %s
34
--- |
45
define void @test_vreg_96_w64() !dbg !5 {
56
entry:

llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s
3+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes="rename-independent-subregs,amdgpu-rewrite-partial-reg-uses" %s -o - | FileCheck -check-prefix=CHECK %s
34
---
45
name: test_subregs_composition_vreg_1024
56
tracksRegLiveness: true

llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s
3+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes="rename-independent-subregs,amdgpu-rewrite-partial-reg-uses" %s -o - | FileCheck -check-prefix=CHECK %s
34
---
45
name: test_subregs_composition_vreg_1024
56
tracksRegLiveness: true

0 commit comments

Comments
 (0)