Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/docs/AMDGPUUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5598,6 +5598,8 @@ The fields used by CP for code objects before V3 also match those specified in
roundup(lds-size / (128 * 4))
GFX950
roundup(lds-size / (320 * 4))
GFX125*
roundup(lds-size / (256 * 4))

24 1 bit ENABLE_EXCEPTION_IEEE_754_FP Wavefront starts execution
_INVALID_OPERATION with specified exceptions
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1548,7 +1548,7 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",

def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
"gfx12",
[FeatureFP64, FeatureAddressableLocalMemorySize65536, FeatureMIMG_R128,
[FeatureFP64, FeatureMIMG_R128,
FeatureFlatAddressSpace, Feature16BitInsts,
FeatureInv2PiInlineImm, FeatureApertureRegs,
FeatureCIInsts, FeatureGFX8Insts, FeatureGFX9Insts, FeatureGFX10Insts,
Expand Down Expand Up @@ -1977,6 +1977,7 @@ def FeatureISAVersion11_5_3 : FeatureSet<

def FeatureISAVersion12 : FeatureSet<
[FeatureGFX12,
FeatureAddressableLocalMemorySize65536,
FeatureLDSBankCount32,
FeatureDLInsts,
FeatureDot7Insts,
Expand Down Expand Up @@ -2019,6 +2020,7 @@ def FeatureISAVersion12_50 : FeatureSet<
[FeatureGFX12,
FeatureGFX1250Insts,
FeatureCUStores,
FeatureAddressableLocalMemorySize327680,
FeatureCuMode,
Feature64BitLiterals,
FeatureLDSBankCount32,
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1103,7 +1103,11 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.DX10Clamp = Mode.DX10Clamp;

unsigned LDSAlignShift;
if (STM.getFeatureBits().test(FeatureAddressableLocalMemorySize163840)) {
if (STM.getFeatureBits().test(FeatureAddressableLocalMemorySize327680)) {
// LDS is allocated in 256 dword blocks.
LDSAlignShift = 10;
} else if (STM.getFeatureBits().test(
FeatureAddressableLocalMemorySize163840)) {
// LDS is allocated in 320 dword blocks.
LDSAlignShift = 11;
} else if (STM.getFeatureBits().test(
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class SubtargetFeatureAddressableLocalMemorySize <int Value> : SubtargetFeature<
def FeatureAddressableLocalMemorySize32768 : SubtargetFeatureAddressableLocalMemorySize<32768>;
def FeatureAddressableLocalMemorySize65536 : SubtargetFeatureAddressableLocalMemorySize<65536>;
def FeatureAddressableLocalMemorySize163840 : SubtargetFeatureAddressableLocalMemorySize<163840>;
def FeatureAddressableLocalMemorySize327680 : SubtargetFeatureAddressableLocalMemorySize<327680>;

class SubtargetFeatureWavefrontSize <int ValueLog2> : SubtargetFeature<
"wavefrontsize"#!shl(1, ValueLog2),
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1160,6 +1160,8 @@ unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) {
return 65536;
if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize163840))
return 163840;
if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize327680))
return 327680;
return 0;
}

Expand Down Expand Up @@ -3340,8 +3342,8 @@ bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST) {
}

unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) {
// Currently this is 128 for all subtargets
return 128;
return ST.hasFeature(AMDGPU::FeatureAddressableLocalMemorySize327680) ? 256
: 128;
}

bool isPackedFP32Inst(unsigned Opc) {
Expand Down
7 changes: 7 additions & 0 deletions llvm/test/CodeGen/AMDGPU/extra-lds-size.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx950 < %s | FileCheck -check-prefix=GFX950-MESA %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX1200-PAL %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX1200-MESA %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-PAL %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-MESA %s

; Check EXTRA_LDS_SIZE in SPI_SHADER_PGM_RSRC2_PS.

Expand All @@ -29,6 +31,11 @@
; GFX1200-MESA: .long 45100
; GFX1200-MESA-NEXT: .long 1024

; GFX1250-PAL: '0x2c0b (SPI_SHADER_PGM_RSRC2_PS)': 0x200

; GFX1250-MESA: .long 45100
; GFX1250-MESA-NEXT: .long 512

@lds = internal addrspace(3) global [4096 x i8] poison

define amdgpu_ps void @global_store_saddr_uniform_ptr_in_vgprs(i32 %voffset) {
Expand Down
13 changes: 13 additions & 0 deletions llvm/test/CodeGen/AMDGPU/lds-limit-diagnostics-gfx1250.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s 2>&1 | FileCheck -check-prefix=ERROR %s

; GFX1250 supports upto 320 KB LDS memory.
; This is a negative test to check when the LDS size exceeds the max usable limit.

; ERROR: error: <unknown>:0:0: local memory (327684) exceeds limit (327680) in function 'test_lds_limit'
@dst = addrspace(3) global [81921 x i32] undef

define amdgpu_kernel void @test_lds_limit(i32 %val) {
%gep = getelementptr [81921 x i32], ptr addrspace(3) @dst, i32 0, i32 100
store i32 %val, ptr addrspace(3) %gep
ret void
}
72 changes: 72 additions & 0 deletions llvm/test/CodeGen/AMDGPU/lds-size-hsa-gfx1250.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 < %s | FileCheck -check-prefix=MESA %s

; GFX1250 supports upto 320 KB configurable LDS memory.
; This test checks the min and max size of LDS that can be allocated.

@lds.i8 = addrspace(3) global i8 undef
@lds.array.i8 = addrspace(3) global [327679 x i8] undef
@lds.i16 = addrspace(3) global i16 undef
@lds.array.i16 = addrspace(3) global [163839 x i16] undef
@lds.i32 = addrspace(3) global i32 undef
@lds.array.i32 = addrspace(3) global [81919 x i32] undef

; GCN-LABEL: test_lds_i8:
; GCN: .amdhsa_group_segment_fixed_size 1
; GCN: ; LDSByteSize: 1 bytes/workgroup
; MESA: granulated_lds_size = 1
define amdgpu_kernel void @test_lds_i8(i8 %val) {
store i8 %val, ptr addrspace(3) @lds.i8
ret void
}

; GCN-LABEL: test_lds_i16:
; GCN: .amdhsa_group_segment_fixed_size 2
; GCN: ; LDSByteSize: 2 bytes/workgroup
; MESA: granulated_lds_size = 1
define amdgpu_kernel void @test_lds_i16(i16 %val) {
store i16 %val, ptr addrspace(3) @lds.i16
ret void
}

; GCN-LABEL: test_lds_i32:
; GCN: .amdhsa_group_segment_fixed_size 4
; GCN: ; LDSByteSize: 4 bytes/workgroup
; MESA: granulated_lds_size = 1
define amdgpu_kernel void @test_lds_i32(i32 %val) {
store i32 %val, ptr addrspace(3) @lds.i32
ret void
}

; GCN-LABEL: test_lds_array_i8:
; GCN: .amdhsa_group_segment_fixed_size 327680
; GCN: ; LDSByteSize: 327680 bytes/workgroup
; MESA: granulated_lds_size = 320
define amdgpu_kernel void @test_lds_array_i8() {
%gep = getelementptr inbounds [327679 x i8], ptr addrspace(3) @lds.array.i8, i32 0, i32 5
%val = load i8, ptr addrspace(3) %gep
store i8 %val, ptr addrspace(3) @lds.i8
ret void
}

; GCN-LABEL: test_lds_array_i16:
; GCN: .amdhsa_group_segment_fixed_size 327680
; GCN: ; LDSByteSize: 327680 bytes/workgroup
; MESA: granulated_lds_size = 320
define amdgpu_kernel void @test_lds_array_i16() {
%gep = getelementptr inbounds [163839 x i16], ptr addrspace(3) @lds.array.i16, i32 0, i32 10
%val = load i16, ptr addrspace(3) %gep
store i16 %val, ptr addrspace(3) @lds.i16
ret void
}

; GCN-LABEL: test_lds_array_i32:
; GCN: .amdhsa_group_segment_fixed_size 327680
; GCN: ; LDSByteSize: 327680 bytes/workgroup
; MESA: granulated_lds_size = 320
define amdgpu_kernel void @test_lds_array_i32() {
%gep = getelementptr inbounds [81919 x i32], ptr addrspace(3) @lds.array.i32, i32 0, i32 20
%val = load i32, ptr addrspace(3) %gep
store i32 %val, ptr addrspace(3) @lds.i32
ret void
}
61 changes: 61 additions & 0 deletions llvm/test/CodeGen/AMDGPU/lds-size-pal-gfx1250.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 < %s | FileCheck -check-prefix=PAL %s

; GFX1250 supports upto 320 KB configurable LDS memory.
; This test checks the min and max size of LDS that can be allocated.

; PAL: .shader_functions:
; PAL: test_lds_array_i16:
; PAL: .lds_size: 0x50000
; PAL: test_lds_array_i32:
; PAL: .lds_size: 0x50000
; PAL: test_lds_array_i8:
; PAL: .lds_size: 0x50000
; PAL: test_lds_i16:
; PAL: .lds_size: 0x2
; PAL: test_lds_i32:
; PAL: .lds_size: 0x4
; PAL: test_lds_i8:
; PAL: .lds_size: 0x1

@lds.i8 = addrspace(3) global i8 undef
@lds.array.i8 = addrspace(3) global [327679 x i8] undef
@lds.i16 = addrspace(3) global i16 undef
@lds.array.i16 = addrspace(3) global [163839 x i16] undef
@lds.i32 = addrspace(3) global i32 undef
@lds.array.i32 = addrspace(3) global [81919 x i32] undef

define amdgpu_gfx void @test_lds_i8(i8 %val) {
store i8 %val, ptr addrspace(3) @lds.i8
ret void
}

define amdgpu_gfx void @test_lds_i16(i16 %val) {
store i16 %val, ptr addrspace(3) @lds.i16
ret void
}

define amdgpu_gfx void @test_lds_i32(i32 %val) {
store i32 %val, ptr addrspace(3) @lds.i32
ret void
}

define amdgpu_gfx void @test_lds_array_i8() {
%gep = getelementptr inbounds [327679 x i8], ptr addrspace(3) @lds.array.i8, i32 0, i32 5
%val = load i8, ptr addrspace(3) %gep
store i8 %val, ptr addrspace(3) @lds.i8
ret void
}

define amdgpu_gfx void @test_lds_array_i16() {
%gep = getelementptr inbounds [163839 x i16], ptr addrspace(3) @lds.array.i16, i32 0, i32 10
%val = load i16, ptr addrspace(3) %gep
store i16 %val, ptr addrspace(3) @lds.i16
ret void
}

define amdgpu_gfx void @test_lds_array_i32() {
%gep = getelementptr inbounds [81919 x i32], ptr addrspace(3) @lds.array.i32, i32 0, i32 20
%val = load i32, ptr addrspace(3) %gep
store i32 %val, ptr addrspace(3) @lds.i32
ret void
}