Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ extern char &GCNPreRAOptimizationsID;
FunctionPass *createAMDGPUSetWavePriorityPass();
void initializeAMDGPUSetWavePriorityPass(PassRegistry &);

void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &);
void initializeGCNRewritePartialRegUsesLegacyPass(llvm::PassRegistry &);
extern char &GCNRewritePartialRegUsesID;

void initializeAMDGPUWaitSGPRHazardsLegacyPass(PassRegistry &);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ FUNCTION_PASS_WITH_PARAMS(
#endif
MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", GCNPreRALongBranchRegPass())
MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
Expand All @@ -119,6 +120,7 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
#define DUMMY_MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-insert-delay-alu", AMDGPUInsertDelayAluPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass())

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "GCNIterativeScheduler.h"
#include "GCNPreRALongBranchReg.h"
#include "GCNPreRAOptimizations.h"
#include "GCNRewritePartialRegUses.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to be unused

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Required by the includes of AMDGPUPassRegistry.def

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh yes

#include "GCNSchedStrategy.h"
#include "GCNVOPDUtils.h"
#include "R600.h"
Expand Down Expand Up @@ -550,7 +551,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeGCNNSAReassignPass(*PR);
initializeGCNPreRAOptimizationsLegacyPass(*PR);
initializeGCNPreRALongBranchRegLegacyPass(*PR);
initializeGCNRewritePartialRegUsesPass(*PR);
initializeGCNRewritePartialRegUsesLegacyPass(*PR);
initializeGCNRegPressurePrinterPass(*PR);
initializeAMDGPUPreloadKernArgPrologLegacyPass(*PR);
initializeAMDGPUWaitSGPRHazardsLegacyPass(*PR);
Expand Down
109 changes: 67 additions & 42 deletions llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
/// calculation and creates more possibilities for the code unaware of lanemasks
//===----------------------------------------------------------------------===//

#include "GCNRewritePartialRegUses.h"
#include "AMDGPU.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIRegisterInfo.h"
Expand All @@ -44,25 +45,7 @@ using namespace llvm;

namespace {

class GCNRewritePartialRegUses : public MachineFunctionPass {
public:
static char ID;
GCNRewritePartialRegUses() : MachineFunctionPass(ID) {}

StringRef getPassName() const override {
return "Rewrite Partial Register Uses";
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addPreserved<LiveIntervalsWrapperPass>();
AU.addPreserved<SlotIndexesWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}

bool runOnMachineFunction(MachineFunction &MF) override;

private:
class GCNRewritePartialRegUsesImpl {
MachineRegisterInfo *MRI;
const SIRegisterInfo *TRI;
const TargetInstrInfo *TII;
Expand Down Expand Up @@ -155,13 +138,36 @@ class GCNRewritePartialRegUses : public MachineFunctionPass {
/// Cache for getAllocatableAndAlignedRegClassMask method:
/// AlignNumBits -> Class bitmask.
mutable SmallDenseMap<unsigned, BitVector> AllocatableAndAlignedRegClassMasks;

public:
GCNRewritePartialRegUsesImpl(LiveIntervals *LS) : LIS(LS) {}
bool run(MachineFunction &MF);
};

class GCNRewritePartialRegUsesLegacy : public MachineFunctionPass {
public:
static char ID;
GCNRewritePartialRegUsesLegacy() : MachineFunctionPass(ID) {}

StringRef getPassName() const override {
return "Rewrite Partial Register Uses";
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addPreserved<LiveIntervalsWrapperPass>();
AU.addPreserved<SlotIndexesWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}

bool runOnMachineFunction(MachineFunction &MF) override;
};

} // end anonymous namespace

// TODO: move this to the tablegen and use binary search by Offset.
unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset,
unsigned Size) const {
unsigned GCNRewritePartialRegUsesImpl::getSubReg(unsigned Offset,
unsigned Size) const {
const auto [I, Inserted] = SubRegs.try_emplace({Offset, Size}, 0);
if (Inserted) {
for (unsigned Idx = 1, E = TRI->getNumSubRegIndices(); Idx < E; ++Idx) {
Expand All @@ -175,15 +181,14 @@ unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset,
return I->second;
}

unsigned GCNRewritePartialRegUses::shiftSubReg(unsigned SubReg,
unsigned RShift) const {
unsigned GCNRewritePartialRegUsesImpl::shiftSubReg(unsigned SubReg,
unsigned RShift) const {
unsigned Offset = TRI->getSubRegIdxOffset(SubReg) - RShift;
return getSubReg(Offset, TRI->getSubRegIdxSize(SubReg));
}

const uint32_t *
GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC,
unsigned SubRegIdx) const {
const uint32_t *GCNRewritePartialRegUsesImpl::getSuperRegClassMask(
const TargetRegisterClass *RC, unsigned SubRegIdx) const {
const auto [I, Inserted] =
SuperRegMasks.try_emplace({RC, SubRegIdx}, nullptr);
if (Inserted) {
Expand All @@ -197,7 +202,8 @@ GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC,
return I->second;
}

const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(
const BitVector &
GCNRewritePartialRegUsesImpl::getAllocatableAndAlignedRegClassMask(
unsigned AlignNumBits) const {
const auto [I, Inserted] =
AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits);
Expand All @@ -214,7 +220,7 @@ const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(
}

const TargetRegisterClass *
GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
GCNRewritePartialRegUsesImpl::getRegClassWithShiftedSubregs(
const TargetRegisterClass *RC, unsigned RShift, unsigned RegNumBits,
unsigned CoverSubregIdx, SubRegMap &SubRegs) const {

Expand Down Expand Up @@ -289,8 +295,8 @@ GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
}

const TargetRegisterClass *
GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC,
SubRegMap &SubRegs) const {
GCNRewritePartialRegUsesImpl::getMinSizeReg(const TargetRegisterClass *RC,
SubRegMap &SubRegs) const {
unsigned CoverSubreg = AMDGPU::NoSubRegister;
unsigned Offset = std::numeric_limits<unsigned>::max();
unsigned End = 0;
Expand Down Expand Up @@ -343,9 +349,8 @@ GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC,

// Only the subrange's lanemasks of the original interval need to be modified.
// Subrange for a covering subreg becomes the main range.
void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg,
Register NewReg,
SubRegMap &SubRegs) const {
void GCNRewritePartialRegUsesImpl::updateLiveIntervals(
Register OldReg, Register NewReg, SubRegMap &SubRegs) const {
if (!LIS->hasInterval(OldReg))
return;

Expand Down Expand Up @@ -400,13 +405,13 @@ void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg,
}

const TargetRegisterClass *
GCNRewritePartialRegUses::getOperandRegClass(MachineOperand &MO) const {
GCNRewritePartialRegUsesImpl::getOperandRegClass(MachineOperand &MO) const {
MachineInstr *MI = MO.getParent();
return TII->getRegClass(TII->get(MI->getOpcode()), MI->getOperandNo(&MO), TRI,
*MI->getParent()->getParent());
}

bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
bool GCNRewritePartialRegUsesImpl::rewriteReg(Register Reg) const {
auto Range = MRI->reg_nodbg_operands(Reg);
if (Range.empty() || any_of(Range, [](MachineOperand &MO) {
return MO.getSubReg() == AMDGPU::NoSubRegister; // Whole reg used. [1]
Expand Down Expand Up @@ -476,24 +481,44 @@ bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
return true;
}

bool GCNRewritePartialRegUses::runOnMachineFunction(MachineFunction &MF) {
bool GCNRewritePartialRegUsesImpl::run(MachineFunction &MF) {
MRI = &MF.getRegInfo();
TRI = static_cast<const SIRegisterInfo *>(MRI->getTargetRegisterInfo());
TII = MF.getSubtarget().getInstrInfo();
auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
bool Changed = false;
for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) {
Changed |= rewriteReg(Register::index2VirtReg(I));
}
return Changed;
}

char GCNRewritePartialRegUses::ID;
bool GCNRewritePartialRegUsesLegacy::runOnMachineFunction(MachineFunction &MF) {
LiveIntervalsWrapperPass *LISWrapper =
getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
GCNRewritePartialRegUsesImpl Impl(LIS);
return Impl.run(MF);
}

PreservedAnalyses
GCNRewritePartialRegUsesPass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(MF);
if (!GCNRewritePartialRegUsesImpl(LIS).run(MF))
return PreservedAnalyses::all();

auto PA = getMachineFunctionPassPreservedAnalyses();
PA.preserveSet<CFGAnalyses>();
PA.preserve<LiveIntervalsAnalysis>();
PA.preserve<SlotIndexesAnalysis>();
return PA;
}

char GCNRewritePartialRegUsesLegacy::ID;

char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUses::ID;
char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUsesLegacy::ID;

INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUses, DEBUG_TYPE,
INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUsesLegacy, DEBUG_TYPE,
"Rewrite Partial Register Uses", false, false)
INITIALIZE_PASS_END(GCNRewritePartialRegUses, DEBUG_TYPE,
INITIALIZE_PASS_END(GCNRewritePartialRegUsesLegacy, DEBUG_TYPE,
"Rewrite Partial Register Uses", false, false)
23 changes: 23 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//===- GCNRewritePartialRegUses.h -------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIB_TARGET_AMDGPU_GCNREWRITEPARTIALREGUSES_H
#define LLVM_LIB_TARGET_AMDGPU_GCNREWRITEPARTIALREGUSES_H

#include "llvm/CodeGen/MachinePassManager.h"

namespace llvm {
class GCNRewritePartialRegUsesPass
: public PassInfoMixin<GCNRewritePartialRegUsesPass> {
public:
PreservedAnalyses run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM);
};
} // namespace llvm

#endif // LLVM_LIB_TARGET_AMDGPU_GCNREWRITEPARTIALREGUSES_H
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
# RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes="rename-independent-subregs,amdgpu-rewrite-partial-reg-uses" %s -o - | FileCheck -check-prefix=CHECK %s
--- |
define void @test_vreg_96_w64() !dbg !5 {
entry:
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes="rename-independent-subregs,amdgpu-rewrite-partial-reg-uses" %s -o - | FileCheck -check-prefix=CHECK %s
---
name: test_subregs_composition_vreg_1024
tracksRegLiveness: true
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes="rename-independent-subregs,amdgpu-rewrite-partial-reg-uses" %s -o - | FileCheck -check-prefix=CHECK %s
---
name: test_subregs_composition_vreg_1024
tracksRegLiveness: true
Expand Down