-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[NFC][AMDGPU][GISel] Precommit GlobalISel specific tests for call instruction #165898
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-globalisel Author: Chinmay Deshpande (chinmaydd) ChangesFull diff: https://github.com/llvm/llvm-project/pull/165898.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll
new file mode 100644
index 0000000000000..054bc9df0ee60
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll
@@ -0,0 +1,398 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s
+
+declare hidden void @external_void_func_void() #0
+declare hidden void @external_void_func_i32(i32) #0
+declare hidden void @external_void_func_i32_inreg(i32 inreg) #0
+declare hidden i32 @external_i32_func_void() #0
+
+declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0
+
+declare hidden amdgpu_gfx void @external_gfx_void_func_void() #0
+declare hidden amdgpu_gfx void @external_gfx_void_func_i32(i32) #0
+declare hidden amdgpu_gfx i32 @external_gfx_i32_func_void() #0
+declare hidden amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg) #0
+declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 }) #0
+declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg) #0
+
+define amdgpu_kernel void @test_call_external_void_func_void() #0 {
+; GFX9-LABEL: test_call_external_void_func_void:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17
+; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
+; GFX9-NEXT: s_add_u32 s0, s0, s17
+; GFX9-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT: s_getpc_b64 s[18:19]
+; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_void@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_void@rel32@hi+12
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT: s_endpgm
+ call void @external_void_func_void()
+ ret void
+}
+
+define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
+; GFX9-LABEL: test_call_external_void_func_i32_imm:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17
+; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
+; GFX9-NEXT: s_add_u32 s0, s0, s17
+; GFX9-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-NEXT: s_add_u32 s8, s8, 8
+; GFX9-NEXT: s_addc_u32 s9, s9, 0
+; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT: s_getpc_b64 s[18:19]
+; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i32@rel32@hi+12
+; GFX9-NEXT: v_mov_b32_e32 v0, 42
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT: s_endpgm
+ call void @external_void_func_i32(i32 42)
+ ret void
+}
+
+define amdgpu_kernel void @test_call_external_i32_func_void() #0 {
+; GFX9-LABEL: test_call_external_i32_func_void:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17
+; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
+; GFX9-NEXT: s_add_u32 s0, s0, s17
+; GFX9-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT: s_getpc_b64 s[18:19]
+; GFX9-NEXT: s_add_u32 s18, s18, external_i32_func_void@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s19, s19, external_i32_func_void@rel32@hi+12
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT: global_store_dword v[0:1], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_endpgm
+ %val = call i32 @external_i32_func_void()
+ store volatile i32 %val, ptr addrspace(1) poison
+ ret void
+}
+
+define amdgpu_kernel void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 {
+; GFX9-LABEL: test_call_external_void_func_i32_inreg:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s17, s33
+; GFX9-NEXT: s_mov_b32 s33, s32
+; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
+; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[18:19]
+; GFX9-NEXT: v_writelane_b32 v40, s17, 2
+; GFX9-NEXT: s_addk_i32 s32, 0x400
+; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: s_getpc_b64 s[18:19]
+; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i32_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i32_inreg@rel32@hi+12
+; GFX9-NEXT: s_mov_b32 s0, s16
+; GFX9-NEXT: v_writelane_b32 v40, s31, 1
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: s_mov_b32 s32, s33
+; GFX9-NEXT: v_readlane_b32 s4, v40, 2
+; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b32 s33, s4
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ call void @external_void_func_i32_inreg(i32 inreg %arg)
+ ret void
+}
+
+define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
+; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17
+; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
+; GFX9-NEXT: s_add_u32 s0, s0, s17
+; GFX9-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-NEXT: v_mov_b32_e32 v3, 3
+; GFX9-NEXT: buffer_store_byte v3, off, s[0:3], 0
+; GFX9-NEXT: v_mov_b32_e32 v3, 8
+; GFX9-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4
+; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], 0
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:4
+; GFX9-NEXT: s_add_u32 s8, s8, 8
+; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX9-NEXT: s_addc_u32 s9, s9, 0
+; GFX9-NEXT: s_movk_i32 s32, 0x800
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT: s_getpc_b64 s[18:19]
+; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
+; GFX9-NEXT: v_mov_b32_e32 v0, 8
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_waitcnt vmcnt(1)
+; GFX9-NEXT: buffer_store_dword v3, off, s[0:3], s32
+; GFX9-NEXT: s_waitcnt vmcnt(1)
+; GFX9-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:4
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:8
+; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:12
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_byte v[0:1], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dword v[0:1], v1, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_endpgm
+ %in.val = alloca { i8, i32 }, align 4, addrspace(5)
+ %out.val = alloca { i8, i32 }, align 4, addrspace(5)
+ %in.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 0
+ %in.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 1
+ store i8 3, ptr addrspace(5) %in.gep0
+ store i32 8, ptr addrspace(5) %in.gep1
+ call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) %out.val, ptr addrspace(5) %in.val)
+ %out.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 0
+ %out.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 1
+ %out.val0 = load i8, ptr addrspace(5) %out.gep0
+ %out.val1 = load i32, ptr addrspace(5) %out.gep1
+ store volatile i8 %out.val0, ptr addrspace(1) poison
+ store volatile i32 %out.val1, ptr addrspace(1) poison
+ ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 {
+; GFX9-LABEL: test_gfx_call_external_void_func_void:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s34, s33
+; GFX9-NEXT: s_mov_b32 s33, s32
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: v_writelane_b32 v40, s34, 2
+; GFX9-NEXT: s_addk_i32 s32, 0x400
+; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: s_getpc_b64 s[34:35]
+; GFX9-NEXT: s_add_u32 s34, s34, external_gfx_void_func_void@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s35, s35, external_gfx_void_func_void@rel32@hi+12
+; GFX9-NEXT: v_writelane_b32 v40, s31, 1
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: s_mov_b32 s32, s33
+; GFX9-NEXT: v_readlane_b32 s34, v40, 2
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: s_mov_b32 s33, s34
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ call amdgpu_gfx void @external_gfx_void_func_void()
+ ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 {
+; GFX9-LABEL: test_gfx_call_external_void_func_i32_imm:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s34, s33
+; GFX9-NEXT: s_mov_b32 s33, s32
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: v_writelane_b32 v40, s34, 2
+; GFX9-NEXT: s_addk_i32 s32, 0x400
+; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: s_getpc_b64 s[34:35]
+; GFX9-NEXT: s_add_u32 s34, s34, external_gfx_void_func_i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s35, s35, external_gfx_void_func_i32@rel32@hi+12
+; GFX9-NEXT: v_mov_b32_e32 v0, 42
+; GFX9-NEXT: v_writelane_b32 v40, s31, 1
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: s_mov_b32 s32, s33
+; GFX9-NEXT: v_readlane_b32 s34, v40, 2
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: s_mov_b32 s33, s34
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ call amdgpu_gfx void @external_gfx_void_func_i32(i32 42)
+ ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 {
+; GFX9-LABEL: test_gfx_call_external_i32_func_void:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s34, s33
+; GFX9-NEXT: s_mov_b32 s33, s32
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: v_writelane_b32 v40, s34, 2
+; GFX9-NEXT: s_addk_i32 s32, 0x400
+; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: s_getpc_b64 s[34:35]
+; GFX9-NEXT: s_add_u32 s34, s34, external_gfx_i32_func_void@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s35, s35, external_gfx_i32_func_void@rel32@hi+12
+; GFX9-NEXT: v_writelane_b32 v40, s31, 1
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT: global_store_dword v[0:1], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: s_mov_b32 s32, s33
+; GFX9-NEXT: v_readlane_b32 s34, v40, 2
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: s_mov_b32 s33, s34
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %val = call amdgpu_gfx i32 @external_gfx_i32_func_void()
+ store volatile i32 %val, ptr addrspace(1) poison
+ ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg) #0 {
+; GFX9-LABEL: test_gfx_call_external_void_func_i32_imm_inreg:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s34, s33
+; GFX9-NEXT: s_mov_b32 s33, s32
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: v_writelane_b32 v40, s34, 3
+; GFX9-NEXT: s_addk_i32 s32, 0x400
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 1
+; GFX9-NEXT: s_getpc_b64 s[34:35]
+; GFX9-NEXT: s_add_u32 s34, s34, external_gfx_void_func_i32_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s35, s35, external_gfx_void_func_i32_inreg@rel32@hi+12
+; GFX9-NEXT: s_mov_b32 s4, 42
+; GFX9-NEXT: v_writelane_b32 v40, s31, 2
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT: v_readlane_b32 s31, v40, 2
+; GFX9-NEXT: v_readlane_b32 s30, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: s_mov_b32 s32, s33
+; GFX9-NEXT: v_readlane_b32 s34, v40, 3
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: s_mov_b32 s33, s34
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42)
+ ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
+; GFX9-LABEL: test_gfx_call_external_void_func_struct_i8_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s34, s33
+; GFX9-NEXT: s_mov_b32 s33, s32
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: v_writelane_b32 v40, s34, 2
+; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_addk_i32 s32, 0x400
+; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_ubyte v0, v2, s[34:35]
+; GFX9-NEXT: global_load_dword v1, v2, s[34:35] offset:4
+; GFX9-NEXT: s_getpc_b64 s[34:35]
+; GFX9-NEXT: s_add_u32 s34, s34, external_gfx_void_func_struct_i8_i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s35, s35, external_gfx_void_func_struct_i8_i32@rel32@hi+12
+; GFX9-NEXT: v_writelane_b32 v40, s31, 1
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: s_mov_b32 s32, s33
+; GFX9-NEXT: v_readlane_b32 s34, v40, 2
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: s_mov_b32 s33, s34
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
+ %val = load { i8, i32 }, ptr addrspace(1) %ptr0
+ call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val)
+ ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #0 {
+; GFX9-LABEL: test_gfx_call_external_void_func_struct_i8_i32_inreg:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s34, s33
+; GFX9-NEXT: s_mov_b32 s33, s32
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: v_writelane_b32 v40, s34, 4
+; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: s_addk_i32 s32, 0x400
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_ubyte v1, v0, s[34:35]
+; GFX9-NEXT: global_load_dword v2, v0, s[34:35] offset:4
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s30, 2
+; GFX9-NEXT: s_getpc_b64 s[34:35]
+; GFX9-NEXT: s_add_u32 s34, s34, external_gfx_void_func_struct_i8_i32_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s35, s35, external_gfx_void_func_struct_i8_i32_inreg@rel32@hi+12
+; GFX9-NEXT: v_writelane_b32 v40, s31, 3
+; GFX9-NEXT: s_waitcnt vmcnt(1)
+; GFX9-NEXT: v_readfirstlane_b32 s4, v1
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_readfirstlane_b32 s5, v2
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT: v_readlane_b32 s31, v40, 3
+; GFX9-NEXT: v_readlane_b32 s30, v40, 2
+; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: s_mov_b32 s32, s33
+; GFX9-NEXT: v_readlane_b32 s34, v40, 4
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: s_mov_b32 s33, s34
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
+ %val = load { i8, i32 }, ptr addrspace(1) %ptr0
+ call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val)
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind noinline }
|
|
@petar-avramovic I wonder if we need to cover more base on the gfx side of things |
| @@ -0,0 +1,398 @@ | |||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | |||
| ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you just add run lines to one of the existing tests instead of copying it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can, however, local testing adds almost 3-5 seconds to the overall test time. Do we draw lines on what could classify as a prohibitive addition ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@arsenm also, bfloat16 is currently not supported in GlobalISel. Adding run lines to existing test files such as call-args-inreg.ll doesnt fit cleanly.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you can either split the test, or enable the fallback
| @@ -0,0 +1,127 @@ | |||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Splitting this into a new file as GISel does not support the bfloat type
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe add a comment to describe the situation here.
28a24c9 to
27791b9
Compare
| ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX89 %s | ||
| ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s | ||
| ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX89 %s | ||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Holding out on adding gfx11 test until #166501 is resolved.
27791b9 to
4a17c23
Compare
|
@arsenm could you look at this one again when you get time, thanks ! |
No description provided.