Skip to content
4 changes: 4 additions & 0 deletions llvm/docs/AMDGPUUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1771,6 +1771,10 @@ The AMDGPU backend supports the following LLVM IR attributes.
using dedicated instructions, but may not send the DEALLOC_VGPRS
message. If a shader has this attribute, then all its callees must
match its value.
An amd_cs_chain CC function with this enabled has an extra symbol
prefixed with "_dvgpr$" with the value of the function symbol,
offset by one less than the number of dynamic VGPR blocks required
by the function encoded in bits 5..3.

================================================ ==========================================================

Expand Down
47 changes: 47 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/Compiler.h"
Expand Down Expand Up @@ -729,6 +730,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutContext, IsLocal));
}

// Emit _dvgpr$ symbol when appropriate.
emitDVgprSymbol(MF);

if (isVerbose()) {
MCSectionELF *CommentSection =
Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
Expand Down Expand Up @@ -871,6 +875,49 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
return false;
}

// When appropriate, add a _dvgpr$ symbol, with the value of the function
// symbol, plus an offset encoding one less than the number of VGPR blocks used
// by the function in bits 5..3 of the symbol value. A "VGPR block" can be
// either 16 VGPRs (for a max of 128), or 32 VGPRs (for a max of 256). This is
// used by a front-end to have functions that are chained rather than called,
// and a dispatcher that dynamically resizes the VGPR count before dispatching
// to a function.
void AMDGPUAsmPrinter::emitDVgprSymbol(MachineFunction &MF) {
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
if (MFI.isDynamicVGPREnabled() &&
MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS_Chain) {
MCContext &Ctx = MF.getContext();
unsigned BlockSize = MFI.getDynamicVGPRBlockSize();
MCValue NumVGPRs;
if (!CurrentProgramInfo.NumVGPRsForWavesPerEU->evaluateAsRelocatable(
NumVGPRs, nullptr) ||
!NumVGPRs.isAbsolute()) {
llvm_unreachable("unable to resolve NumVGPRs for _dvgpr$ symbol");
}
// Calculate number of VGPR blocks.
// Treat 0 VGPRs as 1 VGPR to avoid underflowing.
unsigned NumBlocks =
(std::max(unsigned(NumVGPRs.getConstant()), 1U) + BlockSize - 1) /
BlockSize;
if (NumBlocks > 8) {
OutContext.reportError({},
"too many DVGPR blocks for _dvgpr$ symbol for '" +
Twine(CurrentFnSym->getName()) + "'");
return;
}
unsigned EncodedNumBlocks = (NumBlocks - 1) << 3;
// Add to function symbol to create _dvgpr$ symbol.
const MCExpr *DVgprFuncVal = MCBinaryExpr::createAdd(
MCSymbolRefExpr::create(CurrentFnSym, Ctx),
MCConstantExpr::create(EncodedNumBlocks, Ctx), Ctx);
MCSymbol *DVgprFuncSym =
Ctx.getOrCreateSymbol(Twine("_dvgpr$") + CurrentFnSym->getName());
OutStreamer->emitAssignment(DVgprFuncSym, DVgprFuncVal);
emitVisibility(DVgprFuncSym, MF.getFunction().getVisibility());
emitLinkage(&MF.getFunction(), DVgprFuncSym);
}
}

// TODO: Fold this into emitFunctionBodyStart.
void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {
// In the beginning all features are either 'Any' or 'NotSupported',
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ class AMDGPUAsmPrinter final : public AsmPrinter {

MCCodeEmitter *DumpCodeInstEmitter = nullptr;

// When appropriate, add a _dvgpr$ symbol.
void emitDVgprSymbol(MachineFunction &MF);

void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
void getAmdKernelCode(AMDGPU::AMDGPUMCKernelCodeT &Out,
const SIProgramInfo &KernelInfo,
Expand Down
62 changes: 62 additions & 0 deletions llvm/test/CodeGen/AMDGPU/dvgpr_sym.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
; Test generation of _dvgpr$ symbol for an amdgpu_cs_chain function with dynamic vgprs.

; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefixes=DVGPR %s

; Function with 0 VGPRs, which counts as 1 block.
;
; DVGPR-LABEL: func0:
; DVGPR: .set _dvgpr$func0, func0+0
;
define amdgpu_cs_chain void @func0() #0 {
ret void
}

; Function with 21 VGPRs, which is 2 blocks.
;
; DVGPR-LABEL: func21:
; DVGPR: .set _dvgpr$func21, func21+8
;
define amdgpu_cs_chain void @func21(<21 x float> %arg) #0 {
ret void
}

; Anonymous function with 87 VGPRs, which is 6 blocks.
;
; DVGPR: [[FUNC87:__unnamed[^:]*]]:
; DVGPR: .set _dvgpr$[[FUNC87]], [[FUNC87]]+40
;
define amdgpu_cs_chain void @0(<87 x float> %arg) #0 {
ret void
}

; Function with 128 VGPRs, which is 8 blocks.
;
; DVGPR-LABEL: func128:
; DVGPR: .set _dvgpr$func128, func128+56
;
define amdgpu_cs_chain void @func128(<128 x float> %arg) #0 {
%vec87 = shufflevector <128 x float> %arg, <128 x float> %arg, <87 x i32> splat(i32 0)
tail call void @0(<87 x float> %vec87)
ret void
}

; Function with 79 VGPRs, which is 3 blocks with a block size of 32.
;
; DVGPR-LABEL: func79:
; DVGPR: .set _dvgpr$func79, func79+16
;
define amdgpu_cs_chain void @func79(<79 x float> %arg) #1 {
ret void
}

; Function with 225 VGPRs, which is 8 blocks with a block size of 32.
;
; DVGPR-LABEL: func225:
; DVGPR: .set _dvgpr$func225, func225+56
;
define amdgpu_cs_chain void @func225(<225 x float> %arg) #1 {
ret void
}

attributes #0 = { "amdgpu-dynamic-vgpr-block-size"="16" }
attributes #1 = { "amdgpu-dynamic-vgpr-block-size"="32" }
19 changes: 19 additions & 0 deletions llvm/test/CodeGen/AMDGPU/dvgpr_sym_fail_too_many_block_size_16.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
; Test failure to generate of _dvgpr$ symbol for an amdgpu_cs_chain function with dynamic vgprs.

; RUN: not llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s 2>&1 | FileCheck -check-prefixes=ERR %s

define amdgpu_cs_chain void @0(<87 x float> %arg) #0 {
ret void
}

; Function with 129 VGPRs, which is too many with a block size of 16.
;
; ERR: too many DVGPR blocks for _dvgpr$ symbol for 'func129'
;
define amdgpu_cs_chain void @func129(<129 x float> %arg) #0 {
%vec87 = shufflevector <129 x float> %arg, <129 x float> %arg, <87 x i32> splat(i32 0)
tail call void @0(<87 x float> %vec87)
ret void
}

attributes #0 = { "amdgpu-dynamic-vgpr-block-size"="16" }