Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,8 @@ extern char &SIFixSGPRCopiesLegacyID;
void initializeSIFixVGPRCopiesLegacyPass(PassRegistry &);
extern char &SIFixVGPRCopiesID;

void initializeSILowerWWMCopiesPass(PassRegistry &);
extern char &SILowerWWMCopiesID;
void initializeSILowerWWMCopiesLegacyPass(PassRegistry &);
extern char &SILowerWWMCopiesLegacyID;

void initializeSILowerI1CopiesLegacyPass(PassRegistry &);
extern char &SILowerI1CopiesLegacyID;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass())
MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
MACHINE_FUNCTION_PASS("si-lower-wwm-copies", SILowerWWMCopiesPass())
MACHINE_FUNCTION_PASS("si-opt-vgpr-liverange", SIOptimizeVGPRLiveRangePass())
MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "SILoadStoreOptimizer.h"
#include "SILowerControlFlow.h"
#include "SILowerSGPRSpills.h"
#include "SILowerWWMCopies.h"
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "SIOptimizeExecMasking.h"
Expand Down Expand Up @@ -483,7 +484,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
initializeAMDGPURegBankSelectPass(*PR);
initializeAMDGPURegBankLegalizePass(*PR);
initializeSILowerWWMCopiesPass(*PR);
initializeSILowerWWMCopiesLegacyPass(*PR);
initializeAMDGPUMarkLastScratchLoadPass(*PR);
initializeSILowerSGPRSpillsLegacyPass(*PR);
initializeSIFixSGPRCopiesLegacyPass(*PR);
Expand Down Expand Up @@ -1582,7 +1583,7 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
// For allocating other wwm register operands.
addPass(createWWMRegAllocPass(false));

addPass(&SILowerWWMCopiesID);
addPass(&SILowerWWMCopiesLegacyID);
addPass(&AMDGPUReserveWWMRegsID);

// For allocating per-thread VGPRs.
Expand Down Expand Up @@ -1618,7 +1619,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {

// For allocating other whole wave mode registers.
addPass(createWWMRegAllocPass(true));
addPass(&SILowerWWMCopiesID);
addPass(&SILowerWWMCopiesLegacyID);
addPass(createVirtRegRewriter(false));
addPass(&AMDGPUReserveWWMRegsID);

Expand Down
81 changes: 55 additions & 26 deletions llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
//
//===----------------------------------------------------------------------===//

#include "SILowerWWMCopies.h"
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Expand All @@ -30,12 +31,30 @@ using namespace llvm;

namespace {

class SILowerWWMCopies : public MachineFunctionPass {
class SILowerWWMCopies {
public:
SILowerWWMCopies(LiveIntervals *LIS, SlotIndexes *SI, VirtRegMap *VRM)
: LIS(LIS), Indexes(SI), VRM(VRM) {}
bool run(MachineFunction &MF);

private:
bool isSCCLiveAtMI(const MachineInstr &MI);
void addToWWMSpills(MachineFunction &MF, Register Reg);

LiveIntervals *LIS;
SlotIndexes *Indexes;
VirtRegMap *VRM;
const SIRegisterInfo *TRI;
const MachineRegisterInfo *MRI;
SIMachineFunctionInfo *MFI;
};

class SILowerWWMCopiesLegacy : public MachineFunctionPass {
public:
static char ID;

SILowerWWMCopies() : MachineFunctionPass(ID) {
initializeSILowerWWMCopiesPass(*PassRegistry::getPassRegistry());
SILowerWWMCopiesLegacy() : MachineFunctionPass(ID) {
initializeSILowerWWMCopiesLegacyPass(*PassRegistry::getPassRegistry());
}

bool runOnMachineFunction(MachineFunction &MF) override;
Expand All @@ -49,31 +68,20 @@ class SILowerWWMCopies : public MachineFunctionPass {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}

private:
bool isSCCLiveAtMI(const MachineInstr &MI);
void addToWWMSpills(MachineFunction &MF, Register Reg);

LiveIntervals *LIS;
SlotIndexes *Indexes;
VirtRegMap *VRM;
const SIRegisterInfo *TRI;
const MachineRegisterInfo *MRI;
SIMachineFunctionInfo *MFI;
};

} // End anonymous namespace.

INITIALIZE_PASS_BEGIN(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies",
INITIALIZE_PASS_BEGIN(SILowerWWMCopiesLegacy, DEBUG_TYPE, "SI Lower WWM Copies",
false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
INITIALIZE_PASS_END(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies", false,
false)
INITIALIZE_PASS_END(SILowerWWMCopiesLegacy, DEBUG_TYPE, "SI Lower WWM Copies",
false, false)

char SILowerWWMCopies::ID = 0;
char SILowerWWMCopiesLegacy::ID = 0;

char &llvm::SILowerWWMCopiesID = SILowerWWMCopies::ID;
char &llvm::SILowerWWMCopiesLegacyID = SILowerWWMCopiesLegacy::ID;

bool SILowerWWMCopies::isSCCLiveAtMI(const MachineInstr &MI) {
// We can't determine the liveness info if LIS isn't available. Early return
Expand All @@ -93,23 +101,44 @@ void SILowerWWMCopies::addToWWMSpills(MachineFunction &MF, Register Reg) {
if (Reg.isPhysical())
return;

// FIXME: VRM may be null here.
MCRegister PhysReg = VRM->getPhys(Reg);
assert(PhysReg && "should have allocated a physical register");

MFI->allocateWWMSpill(MF, PhysReg);
}

bool SILowerWWMCopies::runOnMachineFunction(MachineFunction &MF) {
bool SILowerWWMCopiesLegacy::runOnMachineFunction(MachineFunction &MF) {
auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
auto *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;

auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>();
auto *Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr;

auto *VRMWrapper = getAnalysisIfAvailable<VirtRegMapWrapperLegacy>();
auto *VRM = VRMWrapper ? &VRMWrapper->getVRM() : nullptr;

SILowerWWMCopies Impl(LIS, Indexes, VRM);
return Impl.run(MF);
}

PreservedAnalyses
SILowerWWMCopiesPass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(MF);
auto *Indexes = MFAM.getCachedResult<SlotIndexesAnalysis>(MF);
auto *VRM = MFAM.getCachedResult<VirtRegMapAnalysis>(MF);

SILowerWWMCopies Impl(LIS, Indexes, VRM);
Impl.run(MF);
return PreservedAnalyses::all();
}

bool SILowerWWMCopies::run(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();

MFI = MF.getInfo<SIMachineFunctionInfo>();
auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>();
Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr;
auto *VRMWrapper = getAnalysisIfAvailable<VirtRegMapWrapperLegacy>();
VRM = VRMWrapper ? &VRMWrapper->getVRM() : nullptr;
TRI = ST.getRegisterInfo();
MRI = &MF.getRegInfo();

Expand Down
22 changes: 22 additions & 0 deletions llvm/lib/Target/AMDGPU/SILowerWWMCopies.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//===- SILowerWWMCopies.h ---------------------------------------*- C++- *-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIB_TARGET_AMDGPU_SILOWERWWMCOPIES_H
#define LLVM_LIB_TARGET_AMDGPU_SILOWERWWMCOPIES_H

#include "llvm/CodeGen/MachinePassManager.h"

namespace llvm {
class SILowerWWMCopiesPass : public PassInfoMixin<SILowerWWMCopiesPass> {
public:
PreservedAnalyses run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM);
};
} // namespace llvm

#endif // LLVM_LIB_TARGET_AMDGPU_SILOWERWWMCOPIES_H
43 changes: 43 additions & 0 deletions llvm/test/CodeGen/AMDGPU/si-lower-wwm-copies.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5

# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=liveintervals,virtregmap,si-lower-wwm-copies -o - %s | FileCheck %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes="require<live-intervals>,require<virtregmap>,si-lower-wwm-copies" -o - %s | FileCheck %s

# Check for two cases of $scc being live and dead.
---
name: lower-wwm-copies
registers:
- { id: 1, class: vgpr_32, flags: [ WWM_REG ]}
machineFunctionInfo:
sgprForEXECCopy: '$sgpr2_sgpr3'
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: lower-wwm-copies
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: S_CMP_EQ_U32 [[DEF]], 0, implicit-def $scc
; CHECK-NEXT: $sgpr2_sgpr3 = S_MOV_B64 killed $exec
; CHECK-NEXT: $exec = S_MOV_B64 -1
; CHECK-NEXT: $vgpr1 = COPY $vgpr0
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: $vgpr2 = COPY $vgpr1
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3
bb.0:
liveins: $vgpr0, $scc
%0:sgpr_32 = IMPLICIT_DEF
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would have used a physreg for this operand. SGPR allocation is done by now as per the pass flow. It should still work with the SGPR virtual register though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, sgpr should have been allocated at this point.

S_CMP_EQ_U32 %0, 0, implicit-def $scc
$vgpr1 = WWM_COPY $vgpr0
S_CBRANCH_SCC1 %bb.1, implicit killed $scc

bb.1:
liveins: $vgpr1
$vgpr2 = WWM_COPY $vgpr1
Loading