Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ extern char &GCNPreRAOptimizationsID;
FunctionPass *createAMDGPUSetWavePriorityPass();
void initializeAMDGPUSetWavePriorityPass(PassRegistry &);

void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &);
void initializeGCNRewritePartialRegUsesLegacyPass(llvm::PassRegistry &);
extern char &GCNRewritePartialRegUsesID;

void initializeAMDGPUWaitSGPRHazardsLegacyPass(PassRegistry &);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ FUNCTION_PASS_WITH_PARAMS(
#endif
MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", GCNPreRALongBranchRegPass())
MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", SIFixVGPRCopiesPass())
Expand All @@ -119,7 +120,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-insert-delay-alu", AMDGPUInsertDelayAluPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass())

DUMMY_MACHINE_FUNCTION_PASS("si-form-memory-clauses", SIFormMemoryClausesPass())
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "GCNDPPCombine.h"
#include "GCNIterativeScheduler.h"
#include "GCNPreRALongBranchReg.h"
#include "GCNRewritePartialRegUses.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to be unused

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Required by the includes of AMDGPUPassRegistry.def

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh yes

#include "GCNSchedStrategy.h"
#include "GCNVOPDUtils.h"
#include "R600.h"
Expand Down Expand Up @@ -550,7 +551,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeGCNNSAReassignPass(*PR);
initializeGCNPreRAOptimizationsPass(*PR);
initializeGCNPreRALongBranchRegLegacyPass(*PR);
initializeGCNRewritePartialRegUsesPass(*PR);
initializeGCNRewritePartialRegUsesLegacyPass(*PR);
initializeGCNRegPressurePrinterPass(*PR);
initializeAMDGPUPreloadKernArgPrologLegacyPass(*PR);
initializeAMDGPUWaitSGPRHazardsLegacyPass(*PR);
Expand Down
109 changes: 67 additions & 42 deletions llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
/// calculation and creates more possibilities for the code unaware of lanemasks
//===----------------------------------------------------------------------===//

#include "GCNRewritePartialRegUses.h"
#include "AMDGPU.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIRegisterInfo.h"
Expand All @@ -44,25 +45,7 @@ using namespace llvm;

namespace {

class GCNRewritePartialRegUses : public MachineFunctionPass {
public:
static char ID;
GCNRewritePartialRegUses() : MachineFunctionPass(ID) {}

StringRef getPassName() const override {
return "Rewrite Partial Register Uses";
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addPreserved<LiveIntervalsWrapperPass>();
AU.addPreserved<SlotIndexesWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}

bool runOnMachineFunction(MachineFunction &MF) override;

private:
class GCNRewritePartialRegUsesImpl {
MachineRegisterInfo *MRI;
const SIRegisterInfo *TRI;
const TargetInstrInfo *TII;
Expand Down Expand Up @@ -155,13 +138,36 @@ class GCNRewritePartialRegUses : public MachineFunctionPass {
/// Cache for getAllocatableAndAlignedRegClassMask method:
/// AlignNumBits -> Class bitmask.
mutable SmallDenseMap<unsigned, BitVector> AllocatableAndAlignedRegClassMasks;

public:
GCNRewritePartialRegUsesImpl(LiveIntervals *LS) : LIS(LS) {}
bool run(MachineFunction &MF);
};

class GCNRewritePartialRegUsesLegacy : public MachineFunctionPass {
public:
static char ID;
GCNRewritePartialRegUsesLegacy() : MachineFunctionPass(ID) {}

StringRef getPassName() const override {
return "Rewrite Partial Register Uses";
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addPreserved<LiveIntervalsWrapperPass>();
AU.addPreserved<SlotIndexesWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}

bool runOnMachineFunction(MachineFunction &MF) override;
};

} // end anonymous namespace

// TODO: move this to the tablegen and use binary search by Offset.
unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset,
unsigned Size) const {
unsigned GCNRewritePartialRegUsesImpl::getSubReg(unsigned Offset,
unsigned Size) const {
const auto [I, Inserted] = SubRegs.try_emplace({Offset, Size}, 0);
if (Inserted) {
for (unsigned Idx = 1, E = TRI->getNumSubRegIndices(); Idx < E; ++Idx) {
Expand All @@ -175,15 +181,14 @@ unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset,
return I->second;
}

unsigned GCNRewritePartialRegUses::shiftSubReg(unsigned SubReg,
unsigned RShift) const {
unsigned GCNRewritePartialRegUsesImpl::shiftSubReg(unsigned SubReg,
unsigned RShift) const {
unsigned Offset = TRI->getSubRegIdxOffset(SubReg) - RShift;
return getSubReg(Offset, TRI->getSubRegIdxSize(SubReg));
}

const uint32_t *
GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC,
unsigned SubRegIdx) const {
const uint32_t *GCNRewritePartialRegUsesImpl::getSuperRegClassMask(
const TargetRegisterClass *RC, unsigned SubRegIdx) const {
const auto [I, Inserted] =
SuperRegMasks.try_emplace({RC, SubRegIdx}, nullptr);
if (Inserted) {
Expand All @@ -197,7 +202,8 @@ GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC,
return I->second;
}

const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(
const BitVector &
GCNRewritePartialRegUsesImpl::getAllocatableAndAlignedRegClassMask(
unsigned AlignNumBits) const {
const auto [I, Inserted] =
AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits);
Expand All @@ -214,7 +220,7 @@ const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(
}

const TargetRegisterClass *
GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
GCNRewritePartialRegUsesImpl::getRegClassWithShiftedSubregs(
const TargetRegisterClass *RC, unsigned RShift, unsigned RegNumBits,
unsigned CoverSubregIdx, SubRegMap &SubRegs) const {

Expand Down Expand Up @@ -289,8 +295,8 @@ GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
}

const TargetRegisterClass *
GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC,
SubRegMap &SubRegs) const {
GCNRewritePartialRegUsesImpl::getMinSizeReg(const TargetRegisterClass *RC,
SubRegMap &SubRegs) const {
unsigned CoverSubreg = AMDGPU::NoSubRegister;
unsigned Offset = std::numeric_limits<unsigned>::max();
unsigned End = 0;
Expand Down Expand Up @@ -343,9 +349,8 @@ GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC,

// Only the subrange's lanemasks of the original interval need to be modified.
// Subrange for a covering subreg becomes the main range.
void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg,
Register NewReg,
SubRegMap &SubRegs) const {
void GCNRewritePartialRegUsesImpl::updateLiveIntervals(
Register OldReg, Register NewReg, SubRegMap &SubRegs) const {
if (!LIS->hasInterval(OldReg))
return;

Expand Down Expand Up @@ -400,13 +405,13 @@ void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg,
}

const TargetRegisterClass *
GCNRewritePartialRegUses::getOperandRegClass(MachineOperand &MO) const {
GCNRewritePartialRegUsesImpl::getOperandRegClass(MachineOperand &MO) const {
MachineInstr *MI = MO.getParent();
return TII->getRegClass(TII->get(MI->getOpcode()), MI->getOperandNo(&MO), TRI,
*MI->getParent()->getParent());
}

bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
bool GCNRewritePartialRegUsesImpl::rewriteReg(Register Reg) const {
auto Range = MRI->reg_nodbg_operands(Reg);
if (Range.empty() || any_of(Range, [](MachineOperand &MO) {
return MO.getSubReg() == AMDGPU::NoSubRegister; // Whole reg used. [1]
Expand Down Expand Up @@ -476,24 +481,44 @@ bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
return true;
}

bool GCNRewritePartialRegUses::runOnMachineFunction(MachineFunction &MF) {
bool GCNRewritePartialRegUsesImpl::run(MachineFunction &MF) {
MRI = &MF.getRegInfo();
TRI = static_cast<const SIRegisterInfo *>(MRI->getTargetRegisterInfo());
TII = MF.getSubtarget().getInstrInfo();
auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
bool Changed = false;
for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) {
Changed |= rewriteReg(Register::index2VirtReg(I));
}
return Changed;
}

char GCNRewritePartialRegUses::ID;
bool GCNRewritePartialRegUsesLegacy::runOnMachineFunction(MachineFunction &MF) {
LiveIntervalsWrapperPass *LISWrapper =
getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
GCNRewritePartialRegUsesImpl Impl(LIS);
return Impl.run(MF);
}

PreservedAnalyses
GCNRewritePartialRegUsesPass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(MF);
if (!GCNRewritePartialRegUsesImpl(LIS).run(MF))
return PreservedAnalyses::all();

auto PA = getMachineFunctionPassPreservedAnalyses();
PA.preserveSet<CFGAnalyses>();
PA.preserve<LiveIntervalsAnalysis>();
PA.preserve<SlotIndexesAnalysis>();
return PA;
}

char GCNRewritePartialRegUsesLegacy::ID;

char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUses::ID;
char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUsesLegacy::ID;

INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUses, DEBUG_TYPE,
INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUsesLegacy, DEBUG_TYPE,
"Rewrite Partial Register Uses", false, false)
INITIALIZE_PASS_END(GCNRewritePartialRegUses, DEBUG_TYPE,
INITIALIZE_PASS_END(GCNRewritePartialRegUsesLegacy, DEBUG_TYPE,
"Rewrite Partial Register Uses", false, false)
23 changes: 23 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//===- GCNRewritePartialRegUses.h -------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIB_TARGET_AMDGPU_GCNREWRITEPARTIALREGUSES_H
#define LLVM_LIB_TARGET_AMDGPU_GCNREWRITEPARTIALREGUSES_H

#include "llvm/CodeGen/MachinePassManager.h"

namespace llvm {
class GCNRewritePartialRegUsesPass
: public PassInfoMixin<GCNRewritePartialRegUsesPass> {
public:
PreservedAnalyses run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM);
};
} // namespace llvm

#endif // LLVM_LIB_TARGET_AMDGPU_GCNREWRITEPARTIALREGUSES_H
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
# RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes="rename-independent-subregs,amdgpu-rewrite-partial-reg-uses" %s -o - | FileCheck -check-prefix=CHECK %s
--- |
define void @test_vreg_96_w64() !dbg !5 {
entry:
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes="rename-independent-subregs,amdgpu-rewrite-partial-reg-uses" %s -o - | FileCheck -check-prefix=CHECK %s
---
name: test_subregs_composition_vreg_1024
tracksRegLiveness: true
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes="rename-independent-subregs,amdgpu-rewrite-partial-reg-uses" %s -o - | FileCheck -check-prefix=CHECK %s
---
name: test_subregs_composition_vreg_1024
tracksRegLiveness: true
Expand Down