Skip to content

Commit 97f6ccd

Browse files
committed
[AMDGPU] update LDS block size for gfx1250
Should be 2056 bytes (512 dwords) based on current spec.
1 parent ea10026 commit 97f6ccd

File tree

5 files changed

+10
-13
lines changed

5 files changed

+10
-13
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5855,7 +5855,7 @@ The fields used by CP for code objects before V3 also match those specified in
58555855
GFX950
58565856
roundup(lds-size / (320 * 4))
58575857
GFX125*
5858-
roundup(lds-size / (256 * 4))
5858+
roundup(lds-size / (512 * 4))
58595859

58605860
24 1 bit ENABLE_EXCEPTION_IEEE_754_FP Wavefront starts execution
58615861
_INVALID_OPERATION with specified exceptions

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,12 +1161,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
11611161
ProgInfo.DX10Clamp = Mode.DX10Clamp;
11621162

11631163
unsigned LDSAlignShift;
1164-
if (STM.getFeatureBits().test(FeatureAddressableLocalMemorySize327680)) {
1165-
// LDS is allocated in 256 dword blocks.
1166-
LDSAlignShift = 10;
1167-
} else if (STM.getFeatureBits().test(
1168-
FeatureAddressableLocalMemorySize163840)) {
1169-
// LDS is allocated in 320 dword blocks.
1164+
if (STM.getFeatureBits().test(FeatureAddressableLocalMemorySize327680) ||
1165+
STM.getFeatureBits().test(FeatureAddressableLocalMemorySize163840)) {
1166+
// LDS is allocated in 512 or 320 dword blocks.
11701167
LDSAlignShift = 11;
11711168
} else if (STM.getFeatureBits().test(
11721169
FeatureAddressableLocalMemorySize65536)) {

llvm/test/CodeGen/AMDGPU/extra-lds-size.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@
3131
; GFX1200-MESA: .long 45100
3232
; GFX1200-MESA-NEXT: .long 1024
3333

34-
; GFX1250-PAL: '0x2c0b (SPI_SHADER_PGM_RSRC2_PS)': 0x200
34+
; GFX1250-PAL: '0x2c0b (SPI_SHADER_PGM_RSRC2_PS)': 0x100
3535

3636
; GFX1250-MESA: .long 45100
37-
; GFX1250-MESA-NEXT: .long 512
37+
; GFX1250-MESA-NEXT: .long 256
3838

3939
@lds = internal addrspace(3) global [4096 x i8] poison
4040

llvm/test/CodeGen/AMDGPU/lds-size-hsa-gfx1250.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ define amdgpu_kernel void @test_lds_i32(i32 %val) {
4141
; GCN-LABEL: test_lds_array_i8:
4242
; GCN: .amdhsa_group_segment_fixed_size 327680
4343
; GCN: ; LDSByteSize: 327680 bytes/workgroup
44-
; MESA: granulated_lds_size = 320
44+
; MESA: granulated_lds_size = 160
4545
define amdgpu_kernel void @test_lds_array_i8() {
4646
%gep = getelementptr inbounds [327679 x i8], ptr addrspace(3) @lds.array.i8, i32 0, i32 5
4747
%val = load i8, ptr addrspace(3) %gep
@@ -52,7 +52,7 @@ define amdgpu_kernel void @test_lds_array_i8() {
5252
; GCN-LABEL: test_lds_array_i16:
5353
; GCN: .amdhsa_group_segment_fixed_size 327680
5454
; GCN: ; LDSByteSize: 327680 bytes/workgroup
55-
; MESA: granulated_lds_size = 320
55+
; MESA: granulated_lds_size = 160
5656
define amdgpu_kernel void @test_lds_array_i16() {
5757
%gep = getelementptr inbounds [163839 x i16], ptr addrspace(3) @lds.array.i16, i32 0, i32 10
5858
%val = load i16, ptr addrspace(3) %gep
@@ -63,7 +63,7 @@ define amdgpu_kernel void @test_lds_array_i16() {
6363
; GCN-LABEL: test_lds_array_i32:
6464
; GCN: .amdhsa_group_segment_fixed_size 327680
6565
; GCN: ; LDSByteSize: 327680 bytes/workgroup
66-
; MESA: granulated_lds_size = 320
66+
; MESA: granulated_lds_size = 160
6767
define amdgpu_kernel void @test_lds_array_i32() {
6868
%gep = getelementptr inbounds [81919 x i32], ptr addrspace(3) @lds.array.i32, i32 0, i32 20
6969
%val = load i32, ptr addrspace(3) %gep

llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.gfx1250.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@
126126
; CHECK-NEXT: .entry_point: _amdgpu_hs
127127
; CHECK-NEXT: .entry_point_symbol: hs_shader
128128
; CHECK-NEXT: .forward_progress: true
129-
; CHECK-NEXT: .lds_size: 0x1000
129+
; CHECK-NEXT: .lds_size: 0x800
130130
; CHECK-NEXT: .mem_ordered: true
131131
; CHECK-NEXT: .scratch_en: false
132132
; CHECK-NEXT: .scratch_memory_size: 0

0 commit comments

Comments
 (0)