Skip to content

Commit f279c47

Browse files
authored
AMDGPU gfx12: Add _dvgpr$ symbols for dynamic VGPRs (#148251)
For each function with the AMDGPU_CS_Chain calling convention, with dynamic VGPRs enabled, add a _dvgpr$ symbol, with the value of the function symbol, plus an offset encoding one less than the number of VGPR blocks used by the function (16 VGPRs per block, no more than 128) in bits 5..3 of the symbol value. This is used by a front-end to have functions that are chained rather than called, and a dispatcher that dynamically resizes the VGPR count before dispatching to a function.
1 parent 0b04168 commit f279c47

File tree

6 files changed

+163
-0
lines changed

6 files changed

+163
-0
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1771,6 +1771,10 @@ The AMDGPU backend supports the following LLVM IR attributes.
17711771
using dedicated instructions, but may not send the DEALLOC_VGPRS
17721772
message. If a shader has this attribute, then all its callees must
17731773
match its value.
1774+
An amd_cs_chain CC function with this enabled has an extra symbol
1775+
prefixed with "_dvgpr$" with the value of the function symbol,
1776+
offset by one less than the number of dynamic VGPR blocks required
1777+
by the function encoded in bits 5..3.
17741778

17751779
================================================ ==========================================================
17761780

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "llvm/MC/MCContext.h"
4242
#include "llvm/MC/MCSectionELF.h"
4343
#include "llvm/MC/MCStreamer.h"
44+
#include "llvm/MC/MCValue.h"
4445
#include "llvm/MC/TargetRegistry.h"
4546
#include "llvm/Support/AMDHSAKernelDescriptor.h"
4647
#include "llvm/Support/Compiler.h"
@@ -733,6 +734,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
733734
OutContext, IsLocal));
734735
}
735736

737+
// Emit _dvgpr$ symbol when appropriate.
738+
emitDVgprSymbol(MF);
739+
736740
if (isVerbose()) {
737741
MCSectionELF *CommentSection =
738742
Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
@@ -875,6 +879,49 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
875879
return false;
876880
}
877881

882+
// When appropriate, add a _dvgpr$ symbol, with the value of the function
883+
// symbol, plus an offset encoding one less than the number of VGPR blocks used
884+
// by the function in bits 5..3 of the symbol value. A "VGPR block" can be
885+
// either 16 VGPRs (for a max of 128), or 32 VGPRs (for a max of 256). This is
886+
// used by a front-end to have functions that are chained rather than called,
887+
// and a dispatcher that dynamically resizes the VGPR count before dispatching
888+
// to a function.
889+
void AMDGPUAsmPrinter::emitDVgprSymbol(MachineFunction &MF) {
890+
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
891+
if (MFI.isDynamicVGPREnabled() &&
892+
MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS_Chain) {
893+
MCContext &Ctx = MF.getContext();
894+
unsigned BlockSize = MFI.getDynamicVGPRBlockSize();
895+
MCValue NumVGPRs;
896+
if (!CurrentProgramInfo.NumVGPRsForWavesPerEU->evaluateAsRelocatable(
897+
NumVGPRs, nullptr) ||
898+
!NumVGPRs.isAbsolute()) {
899+
llvm_unreachable("unable to resolve NumVGPRs for _dvgpr$ symbol");
900+
}
901+
// Calculate number of VGPR blocks.
902+
// Treat 0 VGPRs as 1 VGPR to avoid underflowing.
903+
unsigned NumBlocks =
904+
divideCeil(std::max(unsigned(NumVGPRs.getConstant()), 1U), BlockSize);
905+
906+
if (NumBlocks > 8) {
907+
OutContext.reportError({},
908+
"too many DVGPR blocks for _dvgpr$ symbol for '" +
909+
Twine(CurrentFnSym->getName()) + "'");
910+
return;
911+
}
912+
unsigned EncodedNumBlocks = (NumBlocks - 1) << 3;
913+
// Add to function symbol to create _dvgpr$ symbol.
914+
const MCExpr *DVgprFuncVal = MCBinaryExpr::createAdd(
915+
MCSymbolRefExpr::create(CurrentFnSym, Ctx),
916+
MCConstantExpr::create(EncodedNumBlocks, Ctx), Ctx);
917+
MCSymbol *DVgprFuncSym =
918+
Ctx.getOrCreateSymbol(Twine("_dvgpr$") + CurrentFnSym->getName());
919+
OutStreamer->emitAssignment(DVgprFuncSym, DVgprFuncVal);
920+
emitVisibility(DVgprFuncSym, MF.getFunction().getVisibility());
921+
emitLinkage(&MF.getFunction(), DVgprFuncSym);
922+
}
923+
}
924+
878925
// TODO: Fold this into emitFunctionBodyStart.
879926
void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {
880927
// In the beginning all features are either 'Any' or 'NotSupported',

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
5454

5555
MCCodeEmitter *DumpCodeInstEmitter = nullptr;
5656

57+
// When appropriate, add a _dvgpr$ symbol.
58+
void emitDVgprSymbol(MachineFunction &MF);
59+
5760
void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
5861
void getAmdKernelCode(AMDGPU::AMDGPUMCKernelCodeT &Out,
5962
const SIProgramInfo &KernelInfo,

llvm/test/CodeGen/AMDGPU/dvgpr_sym.ll

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
; Test generation of _dvgpr$ symbol for an amdgpu_cs_chain function with dynamic vgprs.
2+
3+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefixes=DVGPR %s
4+
5+
; Function with 0 VGPRs, which counts as 1 block.
6+
;
7+
; DVGPR-LABEL: func0:
8+
; DVGPR: .set _dvgpr$func0, func0+0
9+
;
10+
define amdgpu_cs_chain void @func0() #0 {
11+
ret void
12+
}
13+
14+
; Function with 21 VGPRs, which is 2 blocks.
15+
;
16+
; DVGPR-LABEL: func21:
17+
; DVGPR: .set func21.num_vgpr, 21
18+
; DVGPR: .set _dvgpr$func21, func21+8
19+
;
20+
define amdgpu_cs_chain void @func21(<13 x float> %arg) #0 {
21+
tail call void @func21(<13 x float> %arg)
22+
ret void
23+
}
24+
25+
; Anonymous function with 87 VGPRs, which is 6 blocks.
26+
;
27+
; DVGPR: [[FUNC87:__unnamed[^:]*]]:
28+
; DVGPR: .set [[FUNC87]].num_vgpr, 87
29+
; DVGPR: .set _dvgpr$[[FUNC87]], [[FUNC87]]+40
30+
;
31+
define amdgpu_cs_chain void @0(<79 x float> %arg) #0 {
32+
tail call void @0(<79 x float> %arg)
33+
ret void
34+
}
35+
36+
; Function with 128 VGPRs, which is 8 blocks.
37+
;
38+
; DVGPR-LABEL: func128:
39+
; DVGPR: .set func128.num_vgpr, 128
40+
; DVGPR: .set _dvgpr$func128, func128+56
41+
;
42+
define amdgpu_cs_chain void @func128(<120 x float> %arg) #0 {
43+
tail call void @func128(<120 x float> %arg)
44+
ret void
45+
}
46+
47+
; Function with 79 VGPRs, which is 3 blocks with a block size of 32.
48+
;
49+
; DVGPR-LABEL: func79:
50+
; DVGPR: .set func79.num_vgpr, 79
51+
; DVGPR: .set _dvgpr$func79, func79+16
52+
;
53+
define amdgpu_cs_chain void @func79(<71 x float> %arg) #1 {
54+
tail call void @func79(<71 x float> %arg)
55+
ret void
56+
}
57+
58+
; Function with 225 VGPRs, which is 8 blocks with a block size of 32.
59+
;
60+
; DVGPR-LABEL: func225:
61+
; DVGPR: .set func225.num_vgpr, 225
62+
; DVGPR: .set _dvgpr$func225, func225+56
63+
;
64+
define amdgpu_cs_chain void @func225(<217 x float> %arg) #1 {
65+
tail call void @func225(<217 x float> %arg)
66+
ret void
67+
}
68+
69+
attributes #0 = { "amdgpu-dynamic-vgpr-block-size"="16" }
70+
attributes #1 = { "amdgpu-dynamic-vgpr-block-size"="32" }
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
; Test failure to generate the _dvgpr$ symbol for an amdgpu_cs_chain function with dynamic vgprs.
2+
3+
; RUN: not llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s 2>&1 | FileCheck -check-prefixes=ERR %s
4+
5+
; Function with 129 VGPRs, which is too many with a block size of 16.
6+
;
7+
; ERR-DAG: .set func129.num_vgpr, 129
8+
; ERR-DAG: too many DVGPR blocks for _dvgpr$ symbol for 'func129'
9+
;
10+
define amdgpu_cs_chain void @func129(<121 x float> %arg) #0 {
11+
tail call void @func129(<121 x float> %arg)
12+
ret void
13+
}
14+
15+
attributes #0 = { "amdgpu-dynamic-vgpr-block-size"="16" }
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
; Test failure to generate the _dvgpr$ symbol for an anonymous amdgpu_cs_chain function with dynamic vgprs.
2+
3+
; RUN: not llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s 2>&1 | FileCheck -check-prefixes=ERR %s
4+
5+
; Anonymous function with 129 VGPRs, which is too many with a block size of 16.
6+
;
7+
; ERR-DAG: .set __unnamed_1.num_vgpr, 129
8+
; ERR-DAG: too many DVGPR blocks for _dvgpr$ symbol for '__unnamed_1'
9+
;
10+
define amdgpu_cs_chain void @0(<121 x float> %arg) #0 {
11+
tail call void @0(<121 x float> %arg)
12+
ret void
13+
}
14+
15+
; Function that is OK, that chains to @1.
16+
;
17+
define amdgpu_cs_chain void @funcOk(<16 x float> %arg) {
18+
%vec87 = shufflevector <16 x float> %arg, <16 x float> %arg, <121 x i32> splat(i32 0)
19+
tail call void @0(<121 x float> %vec87)
20+
ret void
21+
}
22+
23+
attributes #0 = { "amdgpu-dynamic-vgpr-block-size"="16" }
24+

0 commit comments

Comments
 (0)