@@ -594,4 +594,36 @@ define amdgpu_kernel void @preloadremainder_xyz(ptr addrspace(1) inreg %out) #0
594594 ret void
595595}
596596
597+ define amdgpu_kernel void @no_free_sgprs_preloadremainder_z (ptr addrspace (1 ) inreg %out ) {
598+ ; GFX940-LABEL: no_free_sgprs_preloadremainder_z:
599+ ; GFX940: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
600+ ; GFX940-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
601+ ; GFX940-NEXT: ; %bb.0:
602+ ; GFX940-NEXT: s_load_dword s0, s[2:3], 0x1c
603+ ; GFX940-NEXT: v_mov_b32_e32 v0, 0
604+ ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
605+ ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
606+ ; GFX940-NEXT: v_mov_b32_e32 v1, s0
607+ ; GFX940-NEXT: global_store_dword v0, v1, s[6:7] sc0 sc1
608+ ; GFX940-NEXT: s_endpgm
609+ ;
610+ ; GFX90a-LABEL: no_free_sgprs_preloadremainder_z:
611+ ; GFX90a: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
612+ ; GFX90a-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
613+ ; GFX90a-NEXT: ; %bb.0:
614+ ; GFX90a-NEXT: s_load_dword s0, s[6:7], 0x1c
615+ ; GFX90a-NEXT: v_mov_b32_e32 v0, 0
616+ ; GFX90a-NEXT: s_waitcnt lgkmcnt(0)
617+ ; GFX90a-NEXT: s_lshr_b32 s0, s0, 16
618+ ; GFX90a-NEXT: v_mov_b32_e32 v1, s0
619+ ; GFX90a-NEXT: global_store_dword v0, v1, s[10:11]
620+ ; GFX90a-NEXT: s_endpgm
621+ %imp_arg_ptr = call ptr addrspace (4 ) @llvm.amdgcn.implicitarg.ptr ()
622+ %gep = getelementptr i8 , ptr addrspace (4 ) %imp_arg_ptr , i32 22
623+ %load = load i16 , ptr addrspace (4 ) %gep
624+ %conv = zext i16 %load to i32
625+ store i32 %conv , ptr addrspace (1 ) %out
626+ ret void
627+ }
628+
597629attributes #0 = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size" ="false" }
0 commit comments