Skip to content

Commit 7050d5b

Browse files
committed
[AMDGPU] Limit GFX11 to using 128 VGPRs
This is a temporary measure to avoid generating incorrect code until the compiler understands the new way that GFX11 encodes 16-bit operands in VOP instructions. Differential Revision: https://reviews.llvm.org/D128054
1 parent 132234f commit 7050d5b

File tree

2 files changed

+20
-6
lines changed

2 files changed

+20
-6
lines changed

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ static llvm::cl::opt<unsigned>
3333
llvm::cl::desc("AMDHSA Code Object Version"),
3434
llvm::cl::init(4));
3535

36+
// TODO-GFX11: Remove this when full 16-bit codegen is implemented.
37+
static llvm::cl::opt<bool>
38+
LimitTo128VGPRs("amdgpu-limit-to-128-vgprs", llvm::cl::Hidden,
39+
llvm::cl::desc("Never use more than 128 VGPRs"));
40+
3641
namespace {
3742

3843
/// \returns Bit mask for given bit \p Shift and bit \p Width.
@@ -795,6 +800,15 @@ unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
795800
}
796801

797802
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
803+
if (LimitTo128VGPRs.getNumOccurrences() ? LimitTo128VGPRs
804+
: isGFX11Plus(*STI)) {
805+
// GFX11 changes the encoding of 16-bit operands in VOP1/2/C instructions
806+
// such that values 128..255 no longer mean v128..v255, they mean
807+
// v0.hi..v127.hi instead. Until the compiler understands this, it is not
808+
// safe to use v128..v255.
809+
// TODO-GFX11: Remove this when full 16-bit codegen is implemented.
810+
return 128;
811+
}
798812
if (STI->getFeatureBits().test(FeatureGFX90AInsts))
799813
return 512;
800814
return 256;

llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -536,10 +536,10 @@ define internal void @use256vgprs() {
536536
; GFX10WGP-WAVE64: NumVgprs: 256
537537
; GFX10CU-WAVE32: NumVgprs: 256
538538
; GFX10CU-WAVE64: NumVgprs: 256
539-
; GFX11WGP-WAVE32: NumVgprs: 256
540-
; GFX11WGP-WAVE64: NumVgprs: 256
541-
; GFX11CU-WAVE32: NumVgprs: 256
542-
; GFX11CU-WAVE64: NumVgprs: 256
539+
; GFX11WGP-WAVE32: NumVgprs: 128
540+
; GFX11WGP-WAVE64: NumVgprs: 128
541+
; GFX11CU-WAVE32: NumVgprs: 128
542+
; GFX11CU-WAVE64: NumVgprs: 128
543543
define amdgpu_kernel void @f256() #256 {
544544
call void @use256vgprs()
545545
ret void
@@ -555,8 +555,8 @@ attributes #256 = { nounwind "amdgpu-flat-work-group-size"="256,256" }
555555
; GFX10WGP-WAVE64: NumVgprs: 256
556556
; GFX10CU-WAVE32: NumVgprs: 128
557557
; GFX10CU-WAVE64: NumVgprs: 128
558-
; GFX11WGP-WAVE32: NumVgprs: 256
559-
; GFX11WGP-WAVE64: NumVgprs: 256
558+
; GFX11WGP-WAVE32: NumVgprs: 128
559+
; GFX11WGP-WAVE64: NumVgprs: 128
560560
; GFX11CU-WAVE32: NumVgprs: 128
561561
; GFX11CU-WAVE64: NumVgprs: 128
562562
define amdgpu_kernel void @f512() #512 {

0 commit comments

Comments
 (0)