Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/include/llvm/CodeGen/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ namespace llvm {

/// ProcessImpicitDefs pass - This pass removes IMPLICIT_DEFs.
extern char &ProcessImplicitDefsID;
extern char &MaxsMachineFunctionID;

/// RegisterCoalescer - This pass merges live ranges to eliminate copies.
extern char &RegisterCoalescerID;
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/InitializePasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ void initializePreISelIntrinsicLoweringLegacyPassPass(PassRegistry &);
void initializePrintFunctionPassWrapperPass(PassRegistry &);
void initializePrintModulePassWrapperPass(PassRegistry &);
void initializeProcessImplicitDefsPass(PassRegistry &);
void initializeMaxsMachineFunctionPass(PassRegistry &);
void initializeProfileSummaryInfoWrapperPassPass(PassRegistry &);
void initializePromoteLegacyPassPass(PassRegistry &);
void initializeRABasicPass(PassRegistry &);
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/Passes/MachinePassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ DUMMY_MACHINE_FUNCTION_PASS("mirfs-discriminators", MIRAddFSDiscriminatorsPass)
DUMMY_MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass)
DUMMY_MACHINE_FUNCTION_PASS("print-machine-uniformity", MachineUniformityInfoPrinterPass)
DUMMY_MACHINE_FUNCTION_PASS("processimpdefs", ProcessImplicitDefsPass)
// DUMMY_MACHINE_FUNCTION_PASS("processimpdefsmaxs", MaxsMachineFunctionPass)
DUMMY_MACHINE_FUNCTION_PASS("prologepilog", PrologEpilogInserterPass)
DUMMY_MACHINE_FUNCTION_PASS("prologepilog-code", PrologEpilogCodeInserterPass)
DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass)
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/CodeGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializePostRASchedulerLegacyPass(Registry);
initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
initializeProcessImplicitDefsPass(Registry);
initializeMaxsMachineFunctionPass(Registry);
initializeRABasicPass(Registry);
initializeRAGreedyLegacyPass(Registry);
initializeRegAllocFastPass(Registry);
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1511,6 +1511,8 @@ void GCNPassConfig::addFastRegAlloc() {
TargetPassConfig::addFastRegAlloc();
}

extern FunctionPass *createMaxsMachineFunctionPass();

void GCNPassConfig::addOptimizedRegAlloc() {
if (EnableDCEInRA)
insertPass(&DetectDeadLanesID, &DeadMachineInstructionElimID);
Expand Down Expand Up @@ -1545,6 +1547,8 @@ void GCNPassConfig::addOptimizedRegAlloc() {
if (TM->getOptLevel() > CodeGenOptLevel::Less)
insertPass(&MachineSchedulerID, &SIFormMemoryClausesID);

addPass(createMaxsMachineFunctionPass());

TargetPassConfig::addOptimizedRegAlloc();
}

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUUnifyDivergentExitNodes.cpp
AMDGPUUnifyMetadata.cpp
R600MachineCFGStructurizer.cpp
MaxsMachineFunction.cpp
GCNCreateVOPD.cpp
GCNDPPCombine.cpp
GCNHazardRecognizer.cpp
Expand Down
190 changes: 190 additions & 0 deletions llvm/lib/Target/AMDGPU/MaxsMachineFunction.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
//===---------------------- MaxsMachineFunctionPass.cpp -------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "SIMachineFunctionInfo.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"

using namespace llvm;

#define DEBUG_TYPE "maxsmachinefunction"

namespace {

cl::opt<bool> UnpackFOps("amdgpu-unpack-fops", cl::Hidden,
cl::desc("unpack f ops"), cl::init(false));

struct MaxsMachineFunction : MachineFunctionPass {
static char ID;

MaxsMachineFunction() : MachineFunctionPass(ID) {
// initializeMaxsMachineFunctionPass(*PassRegistry::getPassRegistry());
}

void getAnalysisUsage(AnalysisUsage &au) const override;

bool runOnMachineFunction(MachineFunction &MF) override;

MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::IsSSA);
}
};
} // end anonymous namespace

char MaxsMachineFunction::ID = 0;
char &llvm::MaxsMachineFunctionID = MaxsMachineFunction::ID;

static void initializeMaxsMachineFunctionPassOnce(PassRegistry &Registry) {
PassInfo *PI = new PassInfo(
"MaxsMachineFunction", "maxsmachinefunction", &MaxsMachineFunction::ID,
PassInfo::NormalCtor_t(callDefaultCtor<MaxsMachineFunction>), false,
false);
Registry.registerPass(*PI, true);
}

static llvm::once_flag InitializeMaxsMachineFunctionPassFlag;

void llvm::initializeMaxsMachineFunctionPass(PassRegistry &Registry) {
llvm::call_once(InitializeMaxsMachineFunctionPassFlag,
initializeMaxsMachineFunctionPassOnce, std::ref(Registry));
}

FunctionPass *createMaxsMachineFunctionPass() {
return new MaxsMachineFunction();
}

void MaxsMachineFunction::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addPreserved<AAResultsWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}

std::optional<MachineInstr *> findNthUser(MachineInstr &MI,
MachineRegisterInfo *MRI,
const Register &CRReg,
unsigned N = 1) {
MachineBasicBlock::iterator I = MI;
unsigned Idx = 0;
for (MachineBasicBlock::iterator EL = MI.getParent()->end(); I != EL; ++I) {
for (MachineRegisterInfo::use_instr_iterator
J = MRI->use_instr_begin(CRReg),
JE = MRI->use_instr_end();
J != JE; ++J)
if (&*J == &*I) {
Idx++;
}
if (Idx == N) {
return &*I;
}
}
return {};
}

bool MaxsMachineFunction::runOnMachineFunction(MachineFunction &MF) {

LLVM_DEBUG(dbgs() << "********** MaxsMachineFunction **********\n"
<< "********** Function: " << MF.getName() << '\n');

bool Changed = false;
if (!UnpackFOps)
return Changed;

MachineRegisterInfo *MRI = &MF.getRegInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();

SmallVector<MachineInstr *> toRemove;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
if (MI.getOpcode() == AMDGPU::V_PK_ADD_F32) {
MachineOperand &OldDest = MI.getOperand(0);
MachineOperand &Lhs = MI.getOperand(2);
MachineOperand &Rhs = MI.getOperand(4);

auto lhsLow = MachineOperand::CreateReg(
Lhs.getReg(), Lhs.isDef(), Lhs.isImplicit(), Lhs.isKill(),
Lhs.isDead(), Lhs.isUndef(), Lhs.isEarlyClobber(), AMDGPU::sub0,
Lhs.isDebug(), Lhs.isInternalRead());

auto rhsLow = MachineOperand::CreateReg(
Rhs.getReg(), Rhs.isDef(), Rhs.isImplicit(), Rhs.isKill(),
Rhs.isDead(), Rhs.isUndef(), Rhs.isEarlyClobber(), AMDGPU::sub0,
Rhs.isDebug(), Rhs.isInternalRead());

Register DstReg1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);

MachineInstrBuilder MIB =
BuildMI(MBB, MI, {}, TII->get(AMDGPU::V_ADD_F32_e32), DstReg1)
.add({lhsLow, rhsLow});
if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
(void)MIB.setMIFlag(MachineInstr::MIFlag::NoFPExcept);

auto lhsHigh = MachineOperand::CreateReg(
Lhs.getReg(), Lhs.isDef(), Lhs.isImplicit(), Lhs.isKill(),
Lhs.isDead(), Lhs.isUndef(), Lhs.isEarlyClobber(), AMDGPU::sub1,
Lhs.isDebug(), Lhs.isInternalRead());

auto rhsHigh = MachineOperand::CreateReg(
Rhs.getReg(), Rhs.isDef(), Rhs.isImplicit(), Rhs.isKill(),
Rhs.isDead(), Rhs.isUndef(), Rhs.isEarlyClobber(), AMDGPU::sub1,
Rhs.isDebug(), Rhs.isInternalRead());

Register DstReg2 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);

MIB = BuildMI(MBB, MI, {}, TII->get(AMDGPU::V_ADD_F32_e32), DstReg2)
.add({lhsHigh, rhsHigh});
if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
(void)MIB.setMIFlag(MachineInstr::MIFlag::NoFPExcept);

Register nextOperand;
std::optional<MachineInstr *> I;
if (I = findNthUser(MI, MRI, OldDest.getReg()); *I) {
nextOperand = (*I)->getOperand(0).getReg();
(*I)->getOperand(1).ChangeToRegister(DstReg2, /*isDef*/ false);
}

Register DstReg3 =
MRI->createVirtualRegister(&AMDGPU::VReg_64_Align2RegClass);

auto reqSeq = BuildMI(MBB, *I, {}, TII->get(AMDGPU::REG_SEQUENCE))
.addDef(DstReg3)
.addUse(DstReg1)
.addImm(AMDGPU::sub0)
.addUse(DstReg2)
.addImm(AMDGPU::sub1)
.getInstr();

if (auto I = findNthUser(MI, MRI, nextOperand, 2)) {
reqSeq->getOperand(0).dump();
(*I)->getOperand(1).ChangeToRegister(reqSeq->getOperand(0).getReg(),
/*isDef*/ false);
}

toRemove.push_back(&MI);
Changed = true;
} else if (MI.getOpcode() == AMDGPU::V_PK_MUL_F32) {
MI.dump();
}
}
}
for (auto remove : toRemove)
remove->eraseFromParent();
if (Changed)
MF.dump();
return Changed;
}
Loading