Skip to content
72 changes: 49 additions & 23 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/Compiler.h"
Expand Down Expand Up @@ -194,29 +195,6 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
return;
}

if (STM.isDynamicVGPREnabled() &&
MF->getFunction().getCallingConv() == CallingConv::AMDGPU_CS_Chain) {
// Add a _dvgpr$ symbol, with the value of the function symbol, plus an
// offset encoding one less than the number of VGPR blocks used by the
// function (16 VGPRs per block, no more than 128) in bits 5..3 of the
// symbol value. This is used by a front-end to have functions that are
// chained rather than called, and a dispatcher that dynamically resizes
// the VGPR count before dispatching to a function.
MCContext &Ctx = MF->getContext();
unsigned EncodedNumVGPRs = (ResourceUsage->NumVGPR - 1) >> 1 & 0x38;
MCSymbol *CurPCSym = Ctx.createTempSymbol();
OutStreamer->emitLabel(CurPCSym);
const MCExpr *DVgprFuncVal = MCBinaryExpr::createAdd(
MCSymbolRefExpr::create(CurPCSym, Ctx),
MCConstantExpr::create(EncodedNumVGPRs, Ctx), Ctx);
MCSymbol *DVgprFuncSym =
Ctx.getOrCreateSymbol(Twine("_dvgpr$") + MF->getFunction().getName());
OutStreamer->emitAssignment(DVgprFuncSym, DVgprFuncVal);
cast<MCSymbolELF>(DVgprFuncSym)
->setBinding(
cast<MCSymbolELF>(getSymbol(&MF->getFunction()))->getBinding());
}

if (!MFI.isEntryFunction())
return;

Expand Down Expand Up @@ -748,6 +726,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutContext, IsLocal));
}

// Emit _dvgpr$ symbol when appropriate.
emitDVgprSymbol(MF);

if (isVerbose()) {
MCSectionELF *CommentSection =
Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
Expand Down Expand Up @@ -890,6 +871,51 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
return false;
}

// When appropriate, add a _dvgpr$ symbol, with the value of the function
// symbol, plus an offset encoding one less than the number of VGPR blocks used
// by the function in bits 5..3 of the symbol value. A "VGPR block" can be
// either 16 VGPRs (for a max of 128), or 32 VGPRs (for a max of 256). This is
// used by a front-end to have functions that are chained rather than called,
// and a dispatcher that dynamically resizes the VGPR count before dispatching
// to a function.
void AMDGPUAsmPrinter::emitDVgprSymbol(MachineFunction &MF) {
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
if (MFI.isDynamicVGPREnabled() &&
MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS_Chain) {
MCContext &Ctx = MF.getContext();
unsigned BlockSize = MFI.getDynamicVGPRBlockSize();
MCValue NumVGPRs;
if (!CurrentProgramInfo.NumVGPRsForWavesPerEU->evaluateAsRelocatable(
NumVGPRs, nullptr) ||
!NumVGPRs.isAbsolute()) {
OutContext.reportError({}, "Unable to resolve _dvgpr$ symbol for '" +
Twine(MF.getName()) + "'");
return;
}
// Calculate number of VGPR blocks.
// Treat 0 VGPRs as 1 VGPR to avoid underflowing.
unsigned NumBlocks =
(std::max(unsigned(NumVGPRs.getConstant()), 1U) + BlockSize - 1) /
BlockSize;
if (NumBlocks > 8) {
OutContext.reportError({},
"Too many DVGPR blocks for _dvgpr$ symbol for '" +
Twine(MF.getName()) + "'");
return;
}
unsigned EncodedNumBlocks = (NumBlocks - 1) << 3;
// Add to function symbol to create _dvgpr$ symbol.
const MCExpr *DVgprFuncVal = MCBinaryExpr::createAdd(
MCSymbolRefExpr::create(CurrentFnSym, Ctx),
MCConstantExpr::create(EncodedNumBlocks, Ctx), Ctx);
MCSymbol *DVgprFuncSym =
Ctx.getOrCreateSymbol(Twine("_dvgpr$") + CurrentFnSym->getName());
OutStreamer->emitAssignment(DVgprFuncSym, DVgprFuncVal);
emitVisibility(DVgprFuncSym, MF.getFunction().getVisibility());
emitLinkage(&MF.getFunction(), DVgprFuncSym);
}
}

// TODO: Fold this into emitFunctionBodyStart.
void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {
// In the beginning all features are either 'Any' or 'NotSupported',
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ class AMDGPUAsmPrinter final : public AsmPrinter {

MCCodeEmitter *DumpCodeInstEmitter = nullptr;

// When appropriate, add a _dvgpr$ symbol.
void emitDVgprSymbol(MachineFunction &MF);

void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
void getAmdKernelCode(AMDGPU::AMDGPUMCKernelCodeT &Out,
const SIProgramInfo &KernelInfo,
Expand Down
62 changes: 56 additions & 6 deletions llvm/test/CodeGen/AMDGPU/dvgpr_sym.ll
Original file line number Diff line number Diff line change
@@ -1,12 +1,62 @@
; Test generation of _dvgpr$ symbol for an amdgpu_cs_chain function with +dynamic-vgpr.

; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -asm-verbose=0 < %s | FileCheck -check-prefixes=DVGPR %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefixes=DVGPR %s

; DVGPR-LABEL: func:
; DVGPR: .Ltmp0:
; DVGPR: .set _dvgpr$func, .Ltmp0+{{[0-9]+}}
; Function with 0 VGPRs, which counts as 1 block.
;
; DVGPR-LABEL: func0:
; DVGPR: .set _dvgpr$func0, func0+0
;
define amdgpu_cs_chain void @func0() #0 {
ret void
}

; Function with 21 VGPRs, which is 2 blocks.
;
; DVGPR-LABEL: func21:
; DVGPR: .set _dvgpr$func21, func21+8
;
define amdgpu_cs_chain void @func21(<21 x float> %arg) #0 {
ret void
}

; Anonymous function with 87 VGPRs, which is 6 blocks.
;
; DVGPR: [[FUNC87:__unnamed[^:]*]]:
; DVGPR: .set _dvgpr$[[FUNC87]], [[FUNC87]]+40
;
define amdgpu_cs_chain void @0(<87 x float> %arg) #0 {
ret void
}

define amdgpu_cs_chain void @func() #0 {
; Function with 128 VGPRs, which is 8 blocks.
;
; DVGPR-LABEL: func128:
; DVGPR: .set _dvgpr$func128, func128+56
;
define amdgpu_cs_chain void @func128(<128 x float> %arg) #0 {
%vec87 = shufflevector <128 x float> %arg, <128 x float> %arg, <87 x i32> splat(i32 0)
tail call void @0(<87 x float> %vec87)
ret void
}
attributes #0 = { "target-features"="+dynamic-vgpr" }

; Function with 79 VGPRs, which is 3 blocks with a block size of 32.
;
; DVGPR-LABEL: func79:
; DVGPR: .set _dvgpr$func79, func79+16
;
define amdgpu_cs_chain void @func79(<79 x float> %arg) #1 {
ret void
}

; Function with 225 VGPRs, which is 8 blocks with a block size of 32.
;
; DVGPR-LABEL: func225:
; DVGPR: .set _dvgpr$func225, func225+56
;
define amdgpu_cs_chain void @func225(<225 x float> %arg) #1 {
ret void
}

attributes #0 = { "amdgpu-dynamic-vgpr-block-size"="16" }
attributes #1 = { "amdgpu-dynamic-vgpr-block-size"="32" }
Loading