|
| 1 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s |
| 2 | +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 < %s | FileCheck -check-prefix=MESA %s |
| 3 | + |
| 4 | +; GFX1250 supports upto 320 KB configurable LDS memory. |
| 5 | +; This test checks the min and max size of LDS that can be allocated. |
| 6 | + |
| 7 | +@lds.i8 = addrspace(3) global i8 undef |
| 8 | +@lds.array.i8 = addrspace(3) global [327679 x i8] undef |
| 9 | +@lds.i16 = addrspace(3) global i16 undef |
| 10 | +@lds.array.i16 = addrspace(3) global [163839 x i16] undef |
| 11 | +@lds.i32 = addrspace(3) global i32 undef |
| 12 | +@lds.array.i32 = addrspace(3) global [81919 x i32] undef |
| 13 | + |
| 14 | +; GCN-LABEL: test_lds_i8: |
| 15 | +; GCN: .amdhsa_group_segment_fixed_size 1 |
| 16 | +; GCN: ; LDSByteSize: 1 bytes/workgroup |
| 17 | +; MESA: granulated_lds_size = 1 |
| 18 | +define amdgpu_kernel void @test_lds_i8(i8 %val) { |
| 19 | + store i8 %val, ptr addrspace(3) @lds.i8 |
| 20 | + ret void |
| 21 | +} |
| 22 | + |
| 23 | +; GCN-LABEL: test_lds_i16: |
| 24 | +; GCN: .amdhsa_group_segment_fixed_size 2 |
| 25 | +; GCN: ; LDSByteSize: 2 bytes/workgroup |
| 26 | +; MESA: granulated_lds_size = 1 |
| 27 | +define amdgpu_kernel void @test_lds_i16(i16 %val) { |
| 28 | + store i16 %val, ptr addrspace(3) @lds.i16 |
| 29 | + ret void |
| 30 | +} |
| 31 | + |
| 32 | +; GCN-LABEL: test_lds_i32: |
| 33 | +; GCN: .amdhsa_group_segment_fixed_size 4 |
| 34 | +; GCN: ; LDSByteSize: 4 bytes/workgroup |
| 35 | +; MESA: granulated_lds_size = 1 |
| 36 | +define amdgpu_kernel void @test_lds_i32(i32 %val) { |
| 37 | + store i32 %val, ptr addrspace(3) @lds.i32 |
| 38 | + ret void |
| 39 | +} |
| 40 | + |
| 41 | +; GCN-LABEL: test_lds_array_i8: |
| 42 | +; GCN: .amdhsa_group_segment_fixed_size 327680 |
| 43 | +; GCN: ; LDSByteSize: 327680 bytes/workgroup |
| 44 | +; MESA: granulated_lds_size = 320 |
| 45 | +define amdgpu_kernel void @test_lds_array_i8() { |
| 46 | + %gep = getelementptr inbounds [327679 x i8], ptr addrspace(3) @lds.array.i8, i32 0, i32 5 |
| 47 | + %val = load i8, ptr addrspace(3) %gep |
| 48 | + store i8 %val, ptr addrspace(3) @lds.i8 |
| 49 | + ret void |
| 50 | +} |
| 51 | + |
| 52 | +; GCN-LABEL: test_lds_array_i16: |
| 53 | +; GCN: .amdhsa_group_segment_fixed_size 327680 |
| 54 | +; GCN: ; LDSByteSize: 327680 bytes/workgroup |
| 55 | +; MESA: granulated_lds_size = 320 |
| 56 | +define amdgpu_kernel void @test_lds_array_i16() { |
| 57 | + %gep = getelementptr inbounds [163839 x i16], ptr addrspace(3) @lds.array.i16, i32 0, i32 10 |
| 58 | + %val = load i16, ptr addrspace(3) %gep |
| 59 | + store i16 %val, ptr addrspace(3) @lds.i16 |
| 60 | + ret void |
| 61 | +} |
| 62 | + |
| 63 | +; GCN-LABEL: test_lds_array_i32: |
| 64 | +; GCN: .amdhsa_group_segment_fixed_size 327680 |
| 65 | +; GCN: ; LDSByteSize: 327680 bytes/workgroup |
| 66 | +; MESA: granulated_lds_size = 320 |
| 67 | +define amdgpu_kernel void @test_lds_array_i32() { |
| 68 | + %gep = getelementptr inbounds [81919 x i32], ptr addrspace(3) @lds.array.i32, i32 0, i32 20 |
| 69 | + %val = load i32, ptr addrspace(3) %gep |
| 70 | + store i32 %val, ptr addrspace(3) @lds.i32 |
| 71 | + ret void |
| 72 | +} |
0 commit comments