Skip to content

Commit e2901f1

Browse files
authored
[AMDGPU] Adjust VGPR allocation encoding on gfx1250 (#156546)
1 parent 2b70ad2 commit e2901f1

File tree

2 files changed

+31
-3
lines changed

2 files changed

+31
-3
lines changed

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1376,6 +1376,9 @@ unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
13761376
? *EnableWavefrontSize32
13771377
: STI->getFeatureBits().test(FeatureWavefrontSize32);
13781378

1379+
if (STI->getFeatureBits().test(Feature1024AddressableVGPRs))
1380+
return IsWave32 ? 16 : 8;
1381+
13791382
return IsWave32 ? 8 : 4;
13801383
}
13811384

llvm/test/MC/AMDGPU/hsa-gfx1250-v4.s

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1250 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
1+
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1250 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM,W32 %s
22
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1250 --amdhsa-code-object-version=4 -filetype=obj < %s > %t
33
// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
44
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
5+
// RUN: not llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+wavefrontsize64,-wavefrontsize32 --amdhsa-code-object-version=4 < %s 2>&1 | FileCheck --check-prefix=W64-ERR %s
56

67
// READOBJ: Section Headers
78
// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
8-
// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} 000540 {{[0-9a-f]+}} {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
9+
// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} 000640 {{[0-9a-f]+}} {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
910

1011
// READOBJ: Relocation section '.rela.rodata' at offset
1112
// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
@@ -20,11 +21,13 @@
2021
// READOBJ-NEXT: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr
2122
// READOBJ-NEXT: 0000000000000300 0 FUNC LOCAL PROTECTED 2 disabled_user_sgpr
2223
// READOBJ-NEXT: 0000000000000400 0 FUNC LOCAL PROTECTED 2 max_lds_size
24+
// READOBJ-NEXT: 0000000000000500 0 FUNC LOCAL PROTECTED 2 max_vgprs
2325
// READOBJ-NEXT: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd
2426
// READOBJ-NEXT: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd
2527
// READOBJ-NEXT: 0000000000000080 64 OBJECT LOCAL DEFAULT 3 special_sgpr.kd
2628
// READOBJ-NEXT: 00000000000000c0 64 OBJECT LOCAL DEFAULT 3 disabled_user_sgpr.kd
2729
// READOBJ-NEXT: 0000000000000100 64 OBJECT LOCAL DEFAULT 3 max_lds_size.kd
30+
// READOBJ-NEXT: 0000000000000140 64 OBJECT LOCAL DEFAULT 3 max_vgprs.kd
2831

2932
// OBJDUMP: Contents of section .rodata
3033
// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
@@ -37,7 +40,7 @@
3740
// OBJDUMP-NEXT: 0040 01000000 01000000 0c000000 00000000
3841
// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
3942
// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00c00000
40-
// OBJDUMP-NEXT: 0070 015021c4 410f007f 5e068200 00000000
43+
// OBJDUMP-NEXT: 0070 005021c4 410f007f 5e068200 00000000
4144
// special_sgpr
4245
// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
4346
// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
@@ -53,6 +56,11 @@
5356
// OBJDUMP-NEXT: 0110 00000000 00000000 00000000 00000000
5457
// OBJDUMP-NEXT: 0120 00000000 00000000 00000000 00000000
5558
// OBJDUMP-NEXT: 0130 00000cc0 80000000 00040000 00000000
59+
// max_vgprs
60+
// OBJDUMP-NEXT: 0140 00000000 00000000 00000000 00000000
61+
// OBJDUMP-NEXT: 0150 00000000 00000000 00000000 00000000
62+
// OBJDUMP-NEXT: 0160 00000000 00000000 00000000 00000000
63+
// OBJDUMP-NEXT: 0170 3f000cc0 80000000 00040000 00000000
5664

5765
.text
5866

@@ -84,6 +92,11 @@ disabled_user_sgpr:
8492
max_lds_size:
8593
s_endpgm
8694

95+
.p2align 8
96+
.type max_vgprs,@function
97+
max_vgprs:
98+
s_endpgm
99+
87100
.rodata
88101
// ASM: .rodata
89102

@@ -228,6 +241,18 @@ max_lds_size:
228241
// ASM: .amdhsa_group_segment_fixed_size 393216
229242
// ASM: .end_amdhsa_kernel
230243

244+
// Test maximum VGPR allocation
245+
246+
// ASM: .amdhsa_kernel max_vgprs
247+
// W32: .amdhsa_next_free_vgpr 1024
248+
// W64-ERR: error: value out of range
249+
// ASM: .end_amdhsa_kernel
250+
.p2align 6
251+
.amdhsa_kernel max_vgprs
252+
.amdhsa_next_free_vgpr 1024
253+
.amdhsa_next_free_sgpr 1
254+
.end_amdhsa_kernel
255+
231256
.section .foo
232257

233258
.byte .amdgcn.gfx_generation_number

0 commit comments

Comments
 (0)