Skip to content

Commit a26c3e9

Browse files
authored
[AMDGPU] User SGPR count increased to 32 on gfx1250 (#154205)
1 parent a4cff34 commit a26c3e9

File tree

3 files changed

+11
-11
lines changed

3 files changed

+11
-11
lines changed

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2417,7 +2417,11 @@ unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
24172417
return 0;
24182418
}
24192419

2420-
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; }
2420+
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) {
2421+
if (isGFX1250(STI))
2422+
return 32;
2423+
return 16;
2424+
}
24212425

24222426
bool isSI(const MCSubtargetInfo &STI) {
24232427
return STI.hasFeature(AMDGPU::FeatureSouthernIslands);

llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,9 +117,7 @@ define amdgpu_kernel void @no_free_sgprs_block_count_x(ptr addrspace(1) inreg %o
117117
;
118118
; GFX1250-LABEL: no_free_sgprs_block_count_x:
119119
; GFX1250: ; %bb.0:
120-
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x28
121-
; GFX1250-NEXT: s_wait_kmcnt 0x0
122-
; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
120+
; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s18
123121
; GFX1250-NEXT: global_store_b32 v0, v1, s[8:9]
124122
; GFX1250-NEXT: s_endpgm
125123
%imp_arg_ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()

llvm/test/CodeGen/AMDGPU/preload-kernargs.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -471,13 +471,11 @@ define amdgpu_kernel void @v8i32_arg(ptr addrspace(1) nocapture inreg %out, <8 x
471471
;
472472
; GFX1250-LABEL: v8i32_arg:
473473
; GFX1250: ; %bb.0:
474-
; GFX1250-NEXT: s_load_b256 s[4:11], s[0:1], 0x20
475-
; GFX1250-NEXT: s_wait_kmcnt 0x0
476-
; GFX1250-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v0, s8
477-
; GFX1250-NEXT: v_dual_mov_b32 v1, s9 :: v_dual_mov_b32 v2, s10
478-
; GFX1250-NEXT: v_dual_mov_b32 v3, s11 :: v_dual_mov_b32 v4, s4
479-
; GFX1250-NEXT: v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v6, s6
480-
; GFX1250-NEXT: v_mov_b32_e32 v7, s7
474+
; GFX1250-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v0, s14
475+
; GFX1250-NEXT: v_dual_mov_b32 v1, s15 :: v_dual_mov_b32 v2, s16
476+
; GFX1250-NEXT: v_dual_mov_b32 v3, s17 :: v_dual_mov_b32 v4, s10
477+
; GFX1250-NEXT: v_dual_mov_b32 v5, s11 :: v_dual_mov_b32 v6, s12
478+
; GFX1250-NEXT: v_mov_b32_e32 v7, s13
481479
; GFX1250-NEXT: s_clause 0x1
482480
; GFX1250-NEXT: global_store_b128 v8, v[0:3], s[2:3] offset:16
483481
; GFX1250-NEXT: global_store_b128 v8, v[4:7], s[2:3]

0 commit comments

Comments
 (0)