diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 67bad5884c260..de3253e64b978 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -460,7 +460,7 @@ extern char &GCNPreRAOptimizationsID; FunctionPass *createAMDGPUSetWavePriorityPass(); void initializeAMDGPUSetWavePriorityPass(PassRegistry &); -void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &); +void initializeGCNRewritePartialRegUsesLegacyPass(llvm::PassRegistry &); extern char &GCNRewritePartialRegUsesID; void initializeAMDGPUWaitSGPRHazardsLegacyPass(PassRegistry &); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index d9d97928062f5..14b35a4fd8327 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -98,6 +98,7 @@ FUNCTION_PASS_WITH_PARAMS( #endif MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this)) MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", GCNPreRALongBranchRegPass()) +MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass()) MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass()) MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass()) MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass()) @@ -119,6 +120,7 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass()) #define DUMMY_MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-insert-delay-alu", AMDGPUInsertDelayAluPass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass()) +DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 8b387a191efba..106a16f2fcd4c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -34,6 +34,7 @@ #include "GCNIterativeScheduler.h" #include "GCNPreRALongBranchReg.h" #include "GCNPreRAOptimizations.h" +#include "GCNRewritePartialRegUses.h" #include "GCNSchedStrategy.h" #include "GCNVOPDUtils.h" #include "R600.h" @@ -550,7 +551,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeGCNNSAReassignPass(*PR); initializeGCNPreRAOptimizationsLegacyPass(*PR); initializeGCNPreRALongBranchRegLegacyPass(*PR); - initializeGCNRewritePartialRegUsesPass(*PR); + initializeGCNRewritePartialRegUsesLegacyPass(*PR); initializeGCNRegPressurePrinterPass(*PR); initializeAMDGPUPreloadKernArgPrologLegacyPass(*PR); initializeAMDGPUWaitSGPRHazardsLegacyPass(*PR); diff --git a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp index 077ccf36ea4fb..c58d1b00a1002 100644 --- a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp @@ -28,6 +28,7 @@ /// calculation and creates more possibilities for the code unaware of lanemasks //===----------------------------------------------------------------------===// +#include "GCNRewritePartialRegUses.h" #include "AMDGPU.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIRegisterInfo.h" @@ -44,25 +45,7 @@ using namespace llvm; namespace { -class GCNRewritePartialRegUses : public MachineFunctionPass { -public: - static char ID; - GCNRewritePartialRegUses() : MachineFunctionPass(ID) {} - - StringRef getPassName() const override { - return "Rewrite Partial Register Uses"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addPreserved(); - AU.addPreserved(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - bool runOnMachineFunction(MachineFunction &MF) override; - -private: +class GCNRewritePartialRegUsesImpl { MachineRegisterInfo *MRI; const SIRegisterInfo *TRI; const TargetInstrInfo *TII; @@ -155,13 +138,36 @@ class GCNRewritePartialRegUses : public MachineFunctionPass { /// Cache for getAllocatableAndAlignedRegClassMask method: /// AlignNumBits -> Class bitmask. mutable SmallDenseMap AllocatableAndAlignedRegClassMasks; + +public: + GCNRewritePartialRegUsesImpl(LiveIntervals *LS) : LIS(LS) {} + bool run(MachineFunction &MF); +}; + +class GCNRewritePartialRegUsesLegacy : public MachineFunctionPass { +public: + static char ID; + GCNRewritePartialRegUsesLegacy() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "Rewrite Partial Register Uses"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addPreserved(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; }; } // end anonymous namespace // TODO: move this to the tablegen and use binary search by Offset. -unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset, - unsigned Size) const { +unsigned GCNRewritePartialRegUsesImpl::getSubReg(unsigned Offset, + unsigned Size) const { const auto [I, Inserted] = SubRegs.try_emplace({Offset, Size}, 0); if (Inserted) { for (unsigned Idx = 1, E = TRI->getNumSubRegIndices(); Idx < E; ++Idx) { @@ -175,15 +181,14 @@ unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset, return I->second; } -unsigned GCNRewritePartialRegUses::shiftSubReg(unsigned SubReg, - unsigned RShift) const { +unsigned GCNRewritePartialRegUsesImpl::shiftSubReg(unsigned SubReg, + unsigned RShift) const { unsigned Offset = TRI->getSubRegIdxOffset(SubReg) - RShift; return getSubReg(Offset, TRI->getSubRegIdxSize(SubReg)); } -const uint32_t * -GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC, - unsigned SubRegIdx) const { +const uint32_t *GCNRewritePartialRegUsesImpl::getSuperRegClassMask( + const TargetRegisterClass *RC, unsigned SubRegIdx) const { const auto [I, Inserted] = SuperRegMasks.try_emplace({RC, SubRegIdx}, nullptr); if (Inserted) { @@ -197,7 +202,8 @@ GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC, return I->second; } -const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask( +const BitVector & +GCNRewritePartialRegUsesImpl::getAllocatableAndAlignedRegClassMask( unsigned AlignNumBits) const { const auto [I, Inserted] = AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits); @@ -214,7 +220,7 @@ const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask( } const TargetRegisterClass * -GCNRewritePartialRegUses::getRegClassWithShiftedSubregs( +GCNRewritePartialRegUsesImpl::getRegClassWithShiftedSubregs( const TargetRegisterClass *RC, unsigned RShift, unsigned RegNumBits, unsigned CoverSubregIdx, SubRegMap &SubRegs) const { @@ -289,8 +295,8 @@ GCNRewritePartialRegUses::getRegClassWithShiftedSubregs( } const TargetRegisterClass * -GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC, - SubRegMap &SubRegs) const { +GCNRewritePartialRegUsesImpl::getMinSizeReg(const TargetRegisterClass *RC, + SubRegMap &SubRegs) const { unsigned CoverSubreg = AMDGPU::NoSubRegister; unsigned Offset = std::numeric_limits::max(); unsigned End = 0; @@ -343,9 +349,8 @@ GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC, // Only the subrange's lanemasks of the original interval need to be modified. // Subrange for a covering subreg becomes the main range. -void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg, - Register NewReg, - SubRegMap &SubRegs) const { +void GCNRewritePartialRegUsesImpl::updateLiveIntervals( + Register OldReg, Register NewReg, SubRegMap &SubRegs) const { if (!LIS->hasInterval(OldReg)) return; @@ -400,13 +405,13 @@ void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg, } const TargetRegisterClass * -GCNRewritePartialRegUses::getOperandRegClass(MachineOperand &MO) const { +GCNRewritePartialRegUsesImpl::getOperandRegClass(MachineOperand &MO) const { MachineInstr *MI = MO.getParent(); return TII->getRegClass(TII->get(MI->getOpcode()), MI->getOperandNo(&MO), TRI, *MI->getParent()->getParent()); } -bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const { +bool GCNRewritePartialRegUsesImpl::rewriteReg(Register Reg) const { auto Range = MRI->reg_nodbg_operands(Reg); if (Range.empty() || any_of(Range, [](MachineOperand &MO) { return MO.getSubReg() == AMDGPU::NoSubRegister; // Whole reg used. [1] @@ -476,12 +481,10 @@ bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const { return true; } -bool GCNRewritePartialRegUses::runOnMachineFunction(MachineFunction &MF) { +bool GCNRewritePartialRegUsesImpl::run(MachineFunction &MF) { MRI = &MF.getRegInfo(); TRI = static_cast(MRI->getTargetRegisterInfo()); TII = MF.getSubtarget().getInstrInfo(); - auto *LISWrapper = getAnalysisIfAvailable(); - LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; bool Changed = false; for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) { Changed |= rewriteReg(Register::index2VirtReg(I)); @@ -489,11 +492,33 @@ bool GCNRewritePartialRegUses::runOnMachineFunction(MachineFunction &MF) { return Changed; } -char GCNRewritePartialRegUses::ID; +bool GCNRewritePartialRegUsesLegacy::runOnMachineFunction(MachineFunction &MF) { + LiveIntervalsWrapperPass *LISWrapper = + getAnalysisIfAvailable(); + LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; + GCNRewritePartialRegUsesImpl Impl(LIS); + return Impl.run(MF); +} + +PreservedAnalyses +GCNRewritePartialRegUsesPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + auto *LIS = MFAM.getCachedResult(MF); + if (!GCNRewritePartialRegUsesImpl(LIS).run(MF)) + return PreservedAnalyses::all(); + + auto PA = getMachineFunctionPassPreservedAnalyses(); + PA.preserveSet(); + PA.preserve(); + PA.preserve(); + return PA; +} + +char GCNRewritePartialRegUsesLegacy::ID; -char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUses::ID; +char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUsesLegacy::ID; -INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUses, DEBUG_TYPE, +INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUsesLegacy, DEBUG_TYPE, "Rewrite Partial Register Uses", false, false) -INITIALIZE_PASS_END(GCNRewritePartialRegUses, DEBUG_TYPE, +INITIALIZE_PASS_END(GCNRewritePartialRegUsesLegacy, DEBUG_TYPE, "Rewrite Partial Register Uses", false, false) diff --git a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.h b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.h new file mode 100644 index 0000000000000..b2c3190b5c6ba --- /dev/null +++ b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.h @@ -0,0 +1,23 @@ +//===- GCNRewritePartialRegUses.h -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_GCNREWRITEPARTIALREGUSES_H +#define LLVM_LIB_TARGET_AMDGPU_GCNREWRITEPARTIALREGUSES_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { +class GCNRewritePartialRegUsesPass + : public PassInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_GCNREWRITEPARTIALREGUSES_H diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir index 85d0c054754d0..ede043ce73a47 100644 --- a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir +++ b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 # RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes="rename-independent-subregs,amdgpu-rewrite-partial-reg-uses" %s -o - | FileCheck -check-prefix=CHECK %s --- | define void @test_vreg_96_w64() !dbg !5 { entry: diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir index 037f39df8c3e0..79e9ce2737695 100644 --- a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir +++ b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes="rename-independent-subregs,amdgpu-rewrite-partial-reg-uses" %s -o - | FileCheck -check-prefix=CHECK %s --- name: test_subregs_composition_vreg_1024 tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir index 07e49dcdafd8c..33007ee8a7c38 100644 --- a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir +++ b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes="rename-independent-subregs,amdgpu-rewrite-partial-reg-uses" %s -o - | FileCheck -check-prefix=CHECK %s --- name: test_subregs_composition_vreg_1024 tracksRegLiveness: true