diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg-bfloat.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg-bfloat.ll new file mode 100644 index 0000000000000..04e472419ca61 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg-bfloat.ll @@ -0,0 +1,130 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s + +; We've separated this file from call-args-inreg.ll since GlobalISel does not support the bfloat type. +; Ideally, we should merge the two files once that support lands. + +declare hidden void @external_void_func_bf16_inreg(bfloat inreg) #0 +declare hidden void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg) #0 + +define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 { +; GFX9-LABEL: test_call_external_void_func_bf16_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s17, s33 +; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[18:19] +; GFX9-NEXT: v_writelane_b32 v40, s17, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_getpc_b64 s[18:19] +; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_bf16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_bf16_inreg@rel32@hi+12 +; GFX9-NEXT: s_mov_b32 s0, s16 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_mov_b32 s32, s33 +; GFX9-NEXT: v_readlane_b32 s4, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b32 s33, s4 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_call_external_void_func_bf16_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_mov_b32 s1, s33 +; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s2, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-NEXT: v_writelane_b32 v40, s1, 2 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: s_getpc_b64 s[2:3] +; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_bf16_inreg@rel32@lo+4 +; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_bf16_inreg@rel32@hi+12 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: s_mov_b32 s32, s33 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: s_mov_b32 s33, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + call void @external_void_func_bf16_inreg(bfloat inreg %arg) + ret void +} + +define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg) #0 { +; GFX9-LABEL: test_call_external_void_func_v2bf16_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s17, s33 +; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[18:19] +; GFX9-NEXT: v_writelane_b32 v40, s17, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_getpc_b64 s[18:19] +; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2bf16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2bf16_inreg@rel32@hi+12 +; GFX9-NEXT: s_mov_b32 s0, s16 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_mov_b32 s32, s33 +; GFX9-NEXT: v_readlane_b32 s4, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b32 s33, s4 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_call_external_void_func_v2bf16_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_mov_b32 s1, s33 +; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s2, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-NEXT: v_writelane_b32 v40, s1, 2 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: s_getpc_b64 s[2:3] +; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2bf16_inreg@rel32@lo+4 +; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2bf16_inreg@rel32@hi+12 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: s_mov_b32 s32, s33 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: s_mov_b32 s33, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + call void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg) + ret void +} + diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll index d1cede64ce71d..f96007ae513bd 100644 --- a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll +++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,SDAG %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GISEL %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GISEL %s declare hidden void @external_void_func_i8_inreg(i8 inreg) #0 declare hidden void @external_void_func_i16_inreg(i32 inreg) #0 @@ -12,11 +14,9 @@ declare hidden void @external_void_func_v4i32_inreg(<4 x i32> inreg) #0 declare hidden void @external_void_func_v8i32_inreg(<8 x i32> inreg) #0 declare hidden void @external_void_func_v16i32_inreg(<16 x i32> inreg) #0 declare hidden void @external_void_func_f16_inreg(half inreg) #0 -declare hidden void @external_void_func_bf16_inreg(bfloat inreg) #0 declare hidden void @external_void_func_f32_inreg(float inreg) #0 declare hidden void @external_void_func_f64_inreg(double inreg) #0 declare hidden void @external_void_func_v2f16_inreg(<2 x half> inreg) #0 -declare hidden void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg) #0 declare hidden void @external_void_func_v3f16_inreg(<3 x half> inreg) #0 declare hidden void @external_void_func_v4f16_inreg(<4 x half> inreg) #0 @@ -212,35 +212,6 @@ define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 { } define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 { -; GFX9-LABEL: test_call_external_void_func_i64_inreg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s18, s33 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_getpc_b64 s[18:19] -; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_mov_b32 s1, s17 -; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 -; GFX9-NEXT: s_mov_b32 s32, s33 -; GFX9-NEXT: v_readlane_b32 s4, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] -; GFX9-NEXT: s_mov_b32 s33, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] -; ; GFX11-LABEL: test_call_external_void_func_i64_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -273,35 +244,6 @@ define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 { } define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 { -; GFX9-LABEL: test_call_external_void_func_v2i32_inreg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s18, s33 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_getpc_b64 s[18:19] -; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_mov_b32 s1, s17 -; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 -; GFX9-NEXT: s_mov_b32 s32, s33 -; GFX9-NEXT: v_readlane_b32 s4, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] -; GFX9-NEXT: s_mov_b32 s33, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] -; ; GFX11-LABEL: test_call_external_void_func_v2i32_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -334,36 +276,6 @@ define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 { } define void @test_call_external_void_func_v3i32_inreg(<3 x i32> inreg %arg) #0 { -; GFX9-LABEL: test_call_external_void_func_v3i32_inreg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s19, s33 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-NEXT: v_writelane_b32 v40, s19, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_getpc_b64 s[20:21] -; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v3i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v3i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_mov_b32 s2, s18 -; GFX9-NEXT: s_mov_b32 s1, s17 -; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 -; GFX9-NEXT: s_mov_b32 s32, s33 -; GFX9-NEXT: v_readlane_b32 s4, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] -; GFX9-NEXT: s_mov_b32 s33, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] -; ; GFX11-LABEL: test_call_external_void_func_v3i32_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -396,37 +308,6 @@ define void @test_call_external_void_func_v3i32_inreg(<3 x i32> inreg %arg) #0 { } define void @test_call_external_void_func_v4i32_inreg(<4 x i32> inreg %arg) #0 { -; GFX9-LABEL: test_call_external_void_func_v4i32_inreg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s20, s33 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[22:23] -; GFX9-NEXT: v_writelane_b32 v40, s20, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_getpc_b64 s[20:21] -; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_mov_b32 s3, s19 -; GFX9-NEXT: s_mov_b32 s2, s18 -; GFX9-NEXT: s_mov_b32 s1, s17 -; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 -; GFX9-NEXT: s_mov_b32 s32, s33 -; GFX9-NEXT: v_readlane_b32 s4, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] -; GFX9-NEXT: s_mov_b32 s33, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] -; ; GFX11-LABEL: test_call_external_void_func_v4i32_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -459,41 +340,6 @@ define void @test_call_external_void_func_v4i32_inreg(<4 x i32> inreg %arg) #0 { } define void @test_call_external_void_func_v8i32_inreg(<8 x i32> inreg %arg) #0 { -; GFX9-LABEL: test_call_external_void_func_v8i32_inreg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s24, s33 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 s[26:27], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[26:27] -; GFX9-NEXT: v_writelane_b32 v40, s24, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_getpc_b64 s[24:25] -; GFX9-NEXT: s_add_u32 s24, s24, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s25, s25, external_void_func_v8i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_mov_b32 s3, s19 -; GFX9-NEXT: s_mov_b32 s2, s18 -; GFX9-NEXT: s_mov_b32 s1, s17 -; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: s_mov_b32 s16, s20 -; GFX9-NEXT: s_mov_b32 s17, s21 -; GFX9-NEXT: s_mov_b32 s18, s22 -; GFX9-NEXT: s_mov_b32 s19, s23 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[24:25] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 -; GFX9-NEXT: s_mov_b32 s32, s33 -; GFX9-NEXT: v_readlane_b32 s4, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] -; GFX9-NEXT: s_mov_b32 s33, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] -; ; GFX11-LABEL: test_call_external_void_func_v8i32_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -585,66 +431,6 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 { ret void } -define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 { -; GFX9-LABEL: test_call_external_void_func_bf16_inreg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s17, s33 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-NEXT: v_writelane_b32 v40, s17, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_getpc_b64 s[18:19] -; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_bf16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_bf16_inreg@rel32@hi+12 -; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 -; GFX9-NEXT: s_mov_b32 s32, s33 -; GFX9-NEXT: v_readlane_b32 s4, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] -; GFX9-NEXT: s_mov_b32 s33, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: test_call_external_void_func_bf16_inreg: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s1, s33 -; GFX11-NEXT: s_mov_b32 s33, s32 -; GFX11-NEXT: s_or_saveexec_b32 s2, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s2 -; GFX11-NEXT: v_writelane_b32 v40, s1, 2 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[2:3] -; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_bf16_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_bf16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-NEXT: v_readlane_b32 s30, v40, 0 -; GFX11-NEXT: s_mov_b32 s32, s33 -; GFX11-NEXT: v_readlane_b32 s0, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: s_mov_b32 s33, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: s_setpc_b64 s[30:31] - call void @external_void_func_bf16_inreg(bfloat inreg %arg) - ret void -} - define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 { ; GFX9-LABEL: test_call_external_void_func_f32_inreg: ; GFX9: ; %bb.0: @@ -706,35 +492,6 @@ define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 { } define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 { -; GFX9-LABEL: test_call_external_void_func_f64_inreg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s18, s33 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_getpc_b64 s[18:19] -; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_f64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_f64_inreg@rel32@hi+12 -; GFX9-NEXT: s_mov_b32 s1, s17 -; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 -; GFX9-NEXT: s_mov_b32 s32, s33 -; GFX9-NEXT: v_readlane_b32 s4, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] -; GFX9-NEXT: s_mov_b32 s33, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] -; ; GFX11-LABEL: test_call_external_void_func_f64_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -826,97 +583,7 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0 ret void } - -define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg) #0 { -; GFX9-LABEL: test_call_external_void_func_v2bf16_inreg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s17, s33 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-NEXT: v_writelane_b32 v40, s17, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_getpc_b64 s[18:19] -; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2bf16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2bf16_inreg@rel32@hi+12 -; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 -; GFX9-NEXT: s_mov_b32 s32, s33 -; GFX9-NEXT: v_readlane_b32 s4, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] -; GFX9-NEXT: s_mov_b32 s33, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: test_call_external_void_func_v2bf16_inreg: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s1, s33 -; GFX11-NEXT: s_mov_b32 s33, s32 -; GFX11-NEXT: s_or_saveexec_b32 s2, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s2 -; GFX11-NEXT: v_writelane_b32 v40, s1, 2 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[2:3] -; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2bf16_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2bf16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-NEXT: v_readlane_b32 s30, v40, 0 -; GFX11-NEXT: s_mov_b32 s32, s33 -; GFX11-NEXT: v_readlane_b32 s0, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: s_mov_b32 s33, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: s_setpc_b64 s[30:31] - call void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg) - ret void -} - define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0 { -; GFX9-LABEL: test_call_external_void_func_v3f16_inreg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s18, s33 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_getpc_b64 s[18:19] -; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX9-NEXT: s_mov_b32 s1, s17 -; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 -; GFX9-NEXT: s_mov_b32 s32, s33 -; GFX9-NEXT: v_readlane_b32 s4, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] -; GFX9-NEXT: s_mov_b32 s33, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] -; ; GFX11-LABEL: test_call_external_void_func_v3f16_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -949,35 +616,6 @@ define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0 } define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0 { -; GFX9-LABEL: test_call_external_void_func_v4f16_inreg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s18, s33 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_getpc_b64 s[18:19] -; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v4f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v4f16_inreg@rel32@hi+12 -; GFX9-NEXT: s_mov_b32 s1, s17 -; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 -; GFX9-NEXT: s_mov_b32 s32, s33 -; GFX9-NEXT: v_readlane_b32 s4, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] -; GFX9-NEXT: s_mov_b32 s33, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] -; ; GFX11-LABEL: test_call_external_void_func_v4f16_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1010,35 +648,6 @@ define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0 } define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 { -; GFX9-LABEL: test_call_external_void_func_p0_inreg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s18, s33 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_getpc_b64 s[18:19] -; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_p0_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_p0_inreg@rel32@hi+12 -; GFX9-NEXT: s_mov_b32 s1, s17 -; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 -; GFX9-NEXT: s_mov_b32 s32, s33 -; GFX9-NEXT: v_readlane_b32 s4, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] -; GFX9-NEXT: s_mov_b32 s33, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] -; ; GFX11-LABEL: test_call_external_void_func_p0_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1071,35 +680,6 @@ define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 { } define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg) #0 { -; GFX9-LABEL: test_call_external_void_func_p1_inreg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s18, s33 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_getpc_b64 s[18:19] -; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_p1_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_p1_inreg@rel32@hi+12 -; GFX9-NEXT: s_mov_b32 s1, s17 -; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 -; GFX9-NEXT: s_mov_b32 s32, s33 -; GFX9-NEXT: v_readlane_b32 s4, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] -; GFX9-NEXT: s_mov_b32 s33, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] -; ; GFX11-LABEL: test_call_external_void_func_p1_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1192,37 +772,6 @@ define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg) } define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inreg %arg) #0 { -; GFX9-LABEL: test_call_external_void_func_v2p1_inreg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s20, s33 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[22:23] -; GFX9-NEXT: v_writelane_b32 v40, s20, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_getpc_b64 s[20:21] -; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v2p1_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v2p1_inreg@rel32@hi+12 -; GFX9-NEXT: s_mov_b32 s3, s19 -; GFX9-NEXT: s_mov_b32 s2, s18 -; GFX9-NEXT: s_mov_b32 s1, s17 -; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 -; GFX9-NEXT: s_mov_b32 s32, s33 -; GFX9-NEXT: v_readlane_b32 s4, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] -; GFX9-NEXT: s_mov_b32 s33, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] -; ; GFX11-LABEL: test_call_external_void_func_v2p1_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1255,35 +804,6 @@ define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inre } define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inreg %arg) #0 { -; GFX9-LABEL: test_call_external_void_func_v2p5_inreg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s18, s33 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_getpc_b64 s[18:19] -; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2p5_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2p5_inreg@rel32@hi+12 -; GFX9-NEXT: s_mov_b32 s1, s17 -; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 -; GFX9-NEXT: s_mov_b32 s32, s33 -; GFX9-NEXT: v_readlane_b32 s4, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] -; GFX9-NEXT: s_mov_b32 s33, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] -; ; GFX11-LABEL: test_call_external_void_func_v2p5_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1316,38 +836,6 @@ define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inre } define void @test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inreg %arg0, i32 inreg %arg1, i64 inreg %arg2) #0 { -; GFX9-LABEL: test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s21, s33 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[22:23] -; GFX9-NEXT: v_writelane_b32 v40, s21, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_getpc_b64 s[22:23] -; GFX9-NEXT: s_add_u32 s22, s22, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s23, s23, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_mov_b32 s3, s19 -; GFX9-NEXT: s_mov_b32 s2, s18 -; GFX9-NEXT: s_mov_b32 s1, s17 -; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: s_mov_b32 s16, s20 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[22:23] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 -; GFX9-NEXT: s_mov_b32 s32, s33 -; GFX9-NEXT: v_readlane_b32 s4, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] -; GFX9-NEXT: s_mov_b32 s33, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] -; ; GFX11-LABEL: test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1380,46 +868,6 @@ define void @test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inre } define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) #0 { -; GFX9-LABEL: test_call_external_void_func_a15i32_inreg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s29, s33 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 s[40:41], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[40:41] -; GFX9-NEXT: v_writelane_b32 v40, s29, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_getpc_b64 s[40:41] -; GFX9-NEXT: s_add_u32 s40, s40, external_void_func_a15i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s41, s41, external_void_func_a15i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_mov_b32 s3, s19 -; GFX9-NEXT: s_mov_b32 s2, s18 -; GFX9-NEXT: s_mov_b32 s1, s17 -; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: s_mov_b32 s16, s20 -; GFX9-NEXT: s_mov_b32 s17, s21 -; GFX9-NEXT: s_mov_b32 s18, s22 -; GFX9-NEXT: s_mov_b32 s19, s23 -; GFX9-NEXT: s_mov_b32 s20, s24 -; GFX9-NEXT: s_mov_b32 s21, s25 -; GFX9-NEXT: s_mov_b32 s22, s26 -; GFX9-NEXT: s_mov_b32 s23, s27 -; GFX9-NEXT: s_mov_b32 s24, s28 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[40:41] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 -; GFX9-NEXT: s_mov_b32 s32, s33 -; GFX9-NEXT: v_readlane_b32 s4, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] -; GFX9-NEXT: s_mov_b32 s33, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] -; ; GFX11-LABEL: test_call_external_void_func_a15i32_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1454,47 +902,6 @@ define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) # ; FIXME: This should also fail define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inreg %arg0, i32 inreg %arg1) #1 { -; GFX9-LABEL: test_call_external_void_func_a15i32_inreg_i32_inreg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s21, s33 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[22:23] -; GFX9-NEXT: v_writelane_b32 v40, s21, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_getpc_b64 s[22:23] -; GFX9-NEXT: s_add_u32 s22, s22, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s23, s23, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@hi+12 -; GFX9-NEXT: s_mov_b32 s3, s7 -; GFX9-NEXT: s_mov_b32 s2, s6 -; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_mov_b32 s4, s8 -; GFX9-NEXT: s_mov_b32 s5, s9 -; GFX9-NEXT: s_mov_b32 s6, s10 -; GFX9-NEXT: s_mov_b32 s7, s11 -; GFX9-NEXT: s_mov_b32 s8, s15 -; GFX9-NEXT: s_mov_b32 s9, s16 -; GFX9-NEXT: s_mov_b32 s10, s17 -; GFX9-NEXT: s_mov_b32 s11, s18 -; GFX9-NEXT: s_mov_b32 s15, s19 -; GFX9-NEXT: s_mov_b32 s16, s20 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[22:23] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 -; GFX9-NEXT: s_mov_b32 s32, s33 -; GFX9-NEXT: v_readlane_b32 s4, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] -; GFX9-NEXT: s_mov_b32 s33, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] -; ; GFX11-LABEL: test_call_external_void_func_a15i32_inreg_i32_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1529,3 +936,6 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inre attributes #0 = { nounwind } attributes #1 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GISEL: {{.*}} +; SDAG: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll index 832e43f1e1973..c407f7645315d 100644 --- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll @@ -1,10 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=VI %s ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=CI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -global-isel=0 -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,SDAG %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-TRUE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-FAKE16 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=HSA %s +; RUN: llc -mtriple=amdgcn -global-isel=1 -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GISEL %s declare hidden void @external_void_func_i1(i1) #0 declare hidden void @external_void_func_i1_signext(i1 signext) #0 @@ -100,24 +101,24 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_i1_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 1 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_i1_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 1 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_i1_imm: ; GFX11: ; %bb.0: @@ -145,6 +146,25 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_i1_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 1 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_i1(i1 true) ret void } @@ -196,28 +216,28 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_i1_signext: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 1 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_i1_signext: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SDAG-NEXT: s_mov_b32 s2, -1 +; SDAG-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s5 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: v_bfe_i32 v0, v0, 0, 1 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_i1_signext: ; GFX11: ; %bb.0: @@ -253,6 +273,29 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { ; HSA-NEXT: v_bfe_i32 v0, v0, 0, 1 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_i1_signext: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s2, -1 +; GISEL-NEXT: s_mov_b32 s3, 0xf000 +; GISEL-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s5 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: v_bfe_i32 v0, v0, 0, 1 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm %var = load volatile i1, ptr addrspace(1) poison call void @external_void_func_i1_signext(i1 signext %var) ret void @@ -306,28 +349,28 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_i1_zeroext: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_i1_zeroext: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SDAG-NEXT: s_mov_b32 s2, -1 +; SDAG-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s5 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: v_and_b32_e32 v0, 1, v0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_i1_zeroext: ; GFX11: ; %bb.0: @@ -363,6 +406,29 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { ; HSA-NEXT: v_and_b32_e32 v0, 1, v0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_i1_zeroext: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s2, -1 +; GISEL-NEXT: s_mov_b32 s3, 0xf000 +; GISEL-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s5 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm %var = load volatile i1, ptr addrspace(1) poison call void @external_void_func_i1_zeroext(i1 zeroext %var) ret void @@ -407,24 +473,24 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_i8_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_i8_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s5 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 0x7b +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-TRUE16-LABEL: test_call_external_void_func_i8_imm: ; GFX11-TRUE16: ; %bb.0: @@ -463,6 +529,25 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_i8_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s5 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_i8(i8 123) ret void } @@ -513,27 +598,27 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_i8_signext: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_i8_signext: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SDAG-NEXT: s_mov_b32 s2, -1 +; SDAG-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s5 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_i8_signext: ; GFX11: ; %bb.0: @@ -567,6 +652,28 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_i8_signext: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s2, -1 +; GISEL-NEXT: s_mov_b32 s3, 0xf000 +; GISEL-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s5 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm %var = load volatile i8, ptr addrspace(1) poison call void @external_void_func_i8_signext(i8 signext %var) ret void @@ -617,27 +724,27 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_i8_zeroext: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_i8_zeroext: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SDAG-NEXT: s_mov_b32 s2, -1 +; SDAG-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s5 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_i8_zeroext: ; GFX11: ; %bb.0: @@ -671,6 +778,28 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_i8_zeroext: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s2, -1 +; GISEL-NEXT: s_mov_b32 s3, 0xf000 +; GISEL-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s5 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm %var = load volatile i8, ptr addrspace(1) poison call void @external_void_func_i8_zeroext(i8 zeroext %var) ret void @@ -715,24 +844,24 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_i16_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_i16_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 0x7b +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-TRUE16-LABEL: test_call_external_void_func_i16_imm: ; GFX11-TRUE16: ; %bb.0: @@ -771,6 +900,25 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_i16_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_i16(i16 123) ret void } @@ -820,27 +968,27 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_i16_signext: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_i16_signext: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SDAG-NEXT: s_mov_b32 s2, -1 +; SDAG-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s5 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_i16_signext: ; GFX11: ; %bb.0: @@ -874,6 +1022,28 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_i16_signext: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s2, -1 +; GISEL-NEXT: s_mov_b32 s3, 0xf000 +; GISEL-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s5 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm %var = load volatile i16, ptr addrspace(1) poison call void @external_void_func_i16_signext(i16 signext %var) ret void @@ -924,27 +1094,27 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_i16_zeroext: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_i16_zeroext: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SDAG-NEXT: s_mov_b32 s2, -1 +; SDAG-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s5 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_i16_zeroext: ; GFX11: ; %bb.0: @@ -978,6 +1148,28 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_i16_zeroext: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s2, -1 +; GISEL-NEXT: s_mov_b32 s3, 0xf000 +; GISEL-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s5 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm %var = load volatile i16, ptr addrspace(1) poison call void @external_void_func_i16_zeroext(i16 zeroext %var) ret void @@ -1022,24 +1214,24 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_i32_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 42 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_i32_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s5 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 42 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_i32_imm: ; GFX11: ; %bb.0: @@ -1067,6 +1259,25 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_i32_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s5 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 42 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_i32(i32 42) ret void } @@ -1112,25 +1323,25 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_i64_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_i64_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 0x7b +; SDAG-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_i64_imm: ; GFX11: ; %bb.0: @@ -1159,6 +1370,26 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_i64_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b +; GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_i64(i64 123) ret void } @@ -1208,27 +1439,27 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v2i64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], 0 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v2i64: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], 0 +; SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SDAG-NEXT: s_mov_b32 s2, -1 +; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v2i64: ; GFX11: ; %bb.0: @@ -1262,6 +1493,31 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v2i64: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], 0 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GISEL-NEXT: v_mov_b32_e32 v3, s3 +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm %val = load <2 x i64>, ptr addrspace(1) null call void @external_void_func_v2i64(<2 x i64> %val) ret void @@ -1312,27 +1568,27 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v2i64_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 1 -; GFX9-NEXT: v_mov_b32_e32 v1, 2 -; GFX9-NEXT: v_mov_b32_e32 v2, 3 -; GFX9-NEXT: v_mov_b32_e32 v3, 4 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v2i64_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 1 +; SDAG-NEXT: v_mov_b32_e32 v1, 2 +; SDAG-NEXT: v_mov_b32_e32 v2, 3 +; SDAG-NEXT: v_mov_b32_e32 v3, 4 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v2i64_imm: ; GFX11: ; %bb.0: @@ -1364,6 +1620,28 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v2i64_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 1 +; GISEL-NEXT: v_mov_b32_e32 v1, 2 +; GISEL-NEXT: v_mov_b32_e32 v2, 3 +; GISEL-NEXT: v_mov_b32_e32 v3, 4 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_v2i64(<2 x i64> ) ret void } @@ -1417,29 +1695,29 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v3i64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], 0 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v4, 1 -; GFX9-NEXT: v_mov_b32_e32 v5, 2 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v3i64: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], 0 +; SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SDAG-NEXT: s_mov_b32 s2, -1 +; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v4, 1 +; SDAG-NEXT: v_mov_b32_e32 v5, 2 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v3i64: ; GFX11: ; %bb.0: @@ -1476,6 +1754,33 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v3i64: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], 0 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v4, 1 +; GISEL-NEXT: v_mov_b32_e32 v5, 2 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GISEL-NEXT: v_mov_b32_e32 v3, s3 +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm %load = load <2 x i64>, ptr addrspace(1) null %val = shufflevector <2 x i64> %load, <2 x i64> , <3 x i32> @@ -1536,31 +1841,31 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v4i64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], 0 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v4, 1 -; GFX9-NEXT: v_mov_b32_e32 v5, 2 -; GFX9-NEXT: v_mov_b32_e32 v6, 3 -; GFX9-NEXT: v_mov_b32_e32 v7, 4 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v4i64: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], 0 +; SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SDAG-NEXT: s_mov_b32 s2, -1 +; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v4, 1 +; SDAG-NEXT: v_mov_b32_e32 v5, 2 +; SDAG-NEXT: v_mov_b32_e32 v6, 3 +; SDAG-NEXT: v_mov_b32_e32 v7, 4 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v4i64: ; GFX11: ; %bb.0: @@ -1600,6 +1905,35 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v4i64: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], 0 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v4, 1 +; GISEL-NEXT: v_mov_b32_e32 v5, 2 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GISEL-NEXT: v_mov_b32_e32 v3, s3 +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: v_mov_b32_e32 v6, 3 +; GISEL-NEXT: v_mov_b32_e32 v7, 4 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm %load = load <2 x i64>, ptr addrspace(1) null %val = shufflevector <2 x i64> %load, <2 x i64> , <4 x i32> call void @external_void_func_v4i64(<4 x i64> %val) @@ -1645,24 +1979,24 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_f16_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_f16_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 0x4400 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-TRUE16-LABEL: test_call_external_void_func_f16_imm: ; GFX11-TRUE16: ; %bb.0: @@ -1701,6 +2035,25 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_f16_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 0x4400 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_f16(half 4.0) ret void } @@ -1744,24 +2097,24 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_f32_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 4.0 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_f32_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 4.0 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_f32_imm: ; GFX11: ; %bb.0: @@ -1789,6 +2142,25 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_f32_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 4.0 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_f32(float 4.0) ret void } @@ -1834,25 +2206,25 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v2f32_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 -; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v2f32_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 1.0 +; SDAG-NEXT: v_mov_b32_e32 v1, 2.0 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v2f32_imm: ; GFX11: ; %bb.0: @@ -1881,6 +2253,26 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v2f32_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 1.0 +; GISEL-NEXT: v_mov_b32_e32 v1, 2.0 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_v2f32(<2 x float> ) ret void } @@ -1928,26 +2320,26 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v3f32_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 -; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 -; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v3f32_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 1.0 +; SDAG-NEXT: v_mov_b32_e32 v1, 2.0 +; SDAG-NEXT: v_mov_b32_e32 v2, 4.0 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v3f32_imm: ; GFX11: ; %bb.0: @@ -1978,6 +2370,27 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v3f32_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 1.0 +; GISEL-NEXT: v_mov_b32_e32 v1, 2.0 +; GISEL-NEXT: v_mov_b32_e32 v2, 4.0 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_v3f32(<3 x float> ) ret void } @@ -2029,28 +2442,28 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v5f32_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 -; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 -; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 -; GFX9-NEXT: v_mov_b32_e32 v3, -1.0 -; GFX9-NEXT: v_mov_b32_e32 v4, 0.5 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v5f32_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 1.0 +; SDAG-NEXT: v_mov_b32_e32 v1, 2.0 +; SDAG-NEXT: v_mov_b32_e32 v2, 4.0 +; SDAG-NEXT: v_mov_b32_e32 v3, -1.0 +; SDAG-NEXT: v_mov_b32_e32 v4, 0.5 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v5f32_imm: ; GFX11: ; %bb.0: @@ -2084,6 +2497,29 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v5f32_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 1.0 +; GISEL-NEXT: v_mov_b32_e32 v1, 2.0 +; GISEL-NEXT: v_mov_b32_e32 v2, 4.0 +; GISEL-NEXT: v_mov_b32_e32 v3, -1.0 +; GISEL-NEXT: v_mov_b32_e32 v4, 0.5 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_v5f32(<5 x float> ) ret void } @@ -2129,25 +2565,25 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_f64_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_f64_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-NEXT: v_mov_b32_e32 v1, 0x40100000 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_f64_imm: ; GFX11: ; %bb.0: @@ -2176,6 +2612,26 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_f64_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: v_mov_b32_e32 v1, 0x40100000 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_f64(double 4.0) ret void } @@ -2225,27 +2681,27 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v2f64_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v2f64_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-NEXT: v_mov_b32_e32 v1, 2.0 +; SDAG-NEXT: v_mov_b32_e32 v2, 0 +; SDAG-NEXT: v_mov_b32_e32 v3, 0x40100000 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v2f64_imm: ; GFX11: ; %bb.0: @@ -2277,6 +2733,28 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v2f64_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: v_mov_b32_e32 v1, 2.0 +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_mov_b32_e32 v3, 0x40100000 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_v2f64(<2 x double> ) ret void } @@ -2330,29 +2808,29 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v3f64_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000 -; GFX9-NEXT: v_mov_b32_e32 v4, 0 -; GFX9-NEXT: v_mov_b32_e32 v5, 0x40200000 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v3f64_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-NEXT: v_mov_b32_e32 v1, 2.0 +; SDAG-NEXT: v_mov_b32_e32 v2, 0 +; SDAG-NEXT: v_mov_b32_e32 v3, 0x40100000 +; SDAG-NEXT: v_mov_b32_e32 v4, 0 +; SDAG-NEXT: v_mov_b32_e32 v5, 0x40200000 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v3f64_imm: ; GFX11: ; %bb.0: @@ -2387,6 +2865,30 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v3f64_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: v_mov_b32_e32 v1, 2.0 +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_mov_b32_e32 v3, 0x40100000 +; GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GISEL-NEXT: v_mov_b32_e32 v5, 0x40200000 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_v3f64(<3 x double> ) ret void } @@ -2436,26 +2938,26 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v2i16: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v2i16: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SDAG-NEXT: s_mov_b32 s2, -1 +; SDAG-NEXT: buffer_load_dword v0, off, s[0:3], 0 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v2i16: ; GFX11: ; %bb.0: @@ -2487,6 +2989,27 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v2i16: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_load_dword s8, s[0:1], 0x0 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, s8 +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm %val = load <2 x i16>, ptr addrspace(1) poison call void @external_void_func_v2i16(<2 x i16> %val) ret void @@ -2539,26 +3062,26 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v3i16: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v3i16: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SDAG-NEXT: s_mov_b32 s2, -1 +; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v3i16: ; GFX11: ; %bb.0: @@ -2590,6 +3113,28 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v3i16: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm %val = load <3 x i16>, ptr addrspace(1) poison call void @external_void_func_v3i16(<3 x i16> %val) ret void @@ -2643,26 +3188,26 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v3f16: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v3f16: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SDAG-NEXT: s_mov_b32 s2, -1 +; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v3f16: ; GFX11: ; %bb.0: @@ -2694,6 +3239,28 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v3f16: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm %val = load <3 x half>, ptr addrspace(1) poison call void @external_void_func_v3f16(<3 x half> %val) ret void @@ -2741,25 +3308,25 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v3i16_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 -; GFX9-NEXT: v_mov_b32_e32 v1, 3 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v3i16_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 0x20001 +; SDAG-NEXT: v_mov_b32_e32 v1, 3 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v3i16_imm: ; GFX11: ; %bb.0: @@ -2788,6 +3355,26 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v3i16_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 0x20001 +; GISEL-NEXT: v_mov_b32_e32 v1, 3 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_v3i16(<3 x i16> ) ret void } @@ -2834,25 +3421,25 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v3f16_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00 -; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v3f16_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 0x40003c00 +; SDAG-NEXT: v_mov_b32_e32 v1, 0x4400 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v3f16_imm: ; GFX11: ; %bb.0: @@ -2882,6 +3469,26 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v3f16_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 0x40003c00 +; GISEL-NEXT: v_mov_b32_e32 v1, 0x4400 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_v3f16(<3 x half> ) ret void } @@ -2934,26 +3541,26 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v4i16: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v4i16: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SDAG-NEXT: s_mov_b32 s2, -1 +; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v4i16: ; GFX11: ; %bb.0: @@ -2985,6 +3592,28 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v4i16: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm %val = load <4 x i16>, ptr addrspace(1) poison call void @external_void_func_v4i16(<4 x i16> %val) ret void @@ -3033,25 +3662,25 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v4i16_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 -; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v4i16_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 0x20001 +; SDAG-NEXT: v_mov_b32_e32 v1, 0x40003 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v4i16_imm: ; GFX11: ; %bb.0: @@ -3081,6 +3710,26 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v4i16_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 0x20001 +; GISEL-NEXT: v_mov_b32_e32 v1, 0x40003 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_v4i16(<4 x i16> ) ret void } @@ -3132,26 +3781,26 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v2f16: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v2f16: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SDAG-NEXT: s_mov_b32 s2, -1 +; SDAG-NEXT: buffer_load_dword v0, off, s[0:3], 0 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v2f16: ; GFX11: ; %bb.0: @@ -3183,6 +3832,27 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v2f16: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_load_dword s8, s[0:1], 0x0 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, s8 +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm %val = load <2 x half>, ptr addrspace(1) poison call void @external_void_func_v2f16(<2 x half> %val) ret void @@ -3231,26 +3901,26 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v2i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v2i32: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SDAG-NEXT: s_mov_b32 s2, -1 +; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v2i32: ; GFX11: ; %bb.0: @@ -3282,6 +3952,28 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v2i32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm %val = load <2 x i32>, ptr addrspace(1) poison call void @external_void_func_v2i32(<2 x i32> %val) ret void @@ -3328,25 +4020,25 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v2i32_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 1 -; GFX9-NEXT: v_mov_b32_e32 v1, 2 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v2i32_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 1 +; SDAG-NEXT: v_mov_b32_e32 v1, 2 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v2i32_imm: ; GFX11: ; %bb.0: @@ -3375,6 +4067,26 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v2i32_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 1 +; GISEL-NEXT: v_mov_b32_e32 v1, 2 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_v2i32(<2 x i32> ) ret void } @@ -3422,26 +4134,26 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v3i32_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 3 -; GFX9-NEXT: v_mov_b32_e32 v1, 4 -; GFX9-NEXT: v_mov_b32_e32 v2, 5 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v3i32_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s5 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 3 +; SDAG-NEXT: v_mov_b32_e32 v1, 4 +; SDAG-NEXT: v_mov_b32_e32 v2, 5 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v3i32_imm: ; GFX11: ; %bb.0: @@ -3472,6 +4184,27 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v3i32_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s5 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 3 +; GISEL-NEXT: v_mov_b32_e32 v1, 4 +; GISEL-NEXT: v_mov_b32_e32 v2, 5 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_v3i32(<3 x i32> ) ret void } @@ -3521,27 +4254,27 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v3i32_i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 3 -; GFX9-NEXT: v_mov_b32_e32 v1, 4 -; GFX9-NEXT: v_mov_b32_e32 v2, 5 -; GFX9-NEXT: v_mov_b32_e32 v3, 6 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v3i32_i32: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s5 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 3 +; SDAG-NEXT: v_mov_b32_e32 v1, 4 +; SDAG-NEXT: v_mov_b32_e32 v2, 5 +; SDAG-NEXT: v_mov_b32_e32 v3, 6 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v3i32_i32: ; GFX11: ; %bb.0: @@ -3573,6 +4306,28 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v3i32_i32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s5 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 3 +; GISEL-NEXT: v_mov_b32_e32 v1, 4 +; GISEL-NEXT: v_mov_b32_e32 v2, 5 +; GISEL-NEXT: v_mov_b32_e32 v3, 6 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_v3i32_i32(<3 x i32> , i32 6) ret void } @@ -3620,26 +4375,26 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v4i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v4i32: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SDAG-NEXT: s_mov_b32 s2, -1 +; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v4i32: ; GFX11: ; %bb.0: @@ -3671,6 +4426,30 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v4i32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GISEL-NEXT: v_mov_b32_e32 v3, s3 +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm %val = load <4 x i32>, ptr addrspace(1) poison call void @external_void_func_v4i32(<4 x i32> %val) ret void @@ -3721,27 +4500,27 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v4i32_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 1 -; GFX9-NEXT: v_mov_b32_e32 v1, 2 -; GFX9-NEXT: v_mov_b32_e32 v2, 3 -; GFX9-NEXT: v_mov_b32_e32 v3, 4 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v4i32_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 1 +; SDAG-NEXT: v_mov_b32_e32 v1, 2 +; SDAG-NEXT: v_mov_b32_e32 v2, 3 +; SDAG-NEXT: v_mov_b32_e32 v3, 4 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v4i32_imm: ; GFX11: ; %bb.0: @@ -3773,6 +4552,28 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v4i32_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 1 +; GISEL-NEXT: v_mov_b32_e32 v1, 2 +; GISEL-NEXT: v_mov_b32_e32 v2, 3 +; GISEL-NEXT: v_mov_b32_e32 v3, 4 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_v4i32(<4 x i32> ) ret void } @@ -3824,28 +4625,28 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v5i32_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 1 -; GFX9-NEXT: v_mov_b32_e32 v1, 2 -; GFX9-NEXT: v_mov_b32_e32 v2, 3 -; GFX9-NEXT: v_mov_b32_e32 v3, 4 -; GFX9-NEXT: v_mov_b32_e32 v4, 5 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v5i32_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 1 +; SDAG-NEXT: v_mov_b32_e32 v1, 2 +; SDAG-NEXT: v_mov_b32_e32 v2, 3 +; SDAG-NEXT: v_mov_b32_e32 v3, 4 +; SDAG-NEXT: v_mov_b32_e32 v4, 5 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v5i32_imm: ; GFX11: ; %bb.0: @@ -3879,6 +4680,29 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v5i32_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 1 +; GISEL-NEXT: v_mov_b32_e32 v1, 2 +; GISEL-NEXT: v_mov_b32_e32 v2, 3 +; GISEL-NEXT: v_mov_b32_e32 v3, 4 +; GISEL-NEXT: v_mov_b32_e32 v4, 5 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_v5i32(<5 x i32> ) ret void } @@ -3932,29 +4756,29 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v8i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v8i32: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SDAG-NEXT: s_mov_b32 s2, -1 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 +; SDAG-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v8i32: ; GFX11: ; %bb.0: @@ -3993,6 +4817,36 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v8i32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: s_load_dwordx8 s[8:15], s[0:1], 0x0 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, s8 +; GISEL-NEXT: v_mov_b32_e32 v1, s9 +; GISEL-NEXT: v_mov_b32_e32 v2, s10 +; GISEL-NEXT: v_mov_b32_e32 v3, s11 +; GISEL-NEXT: v_mov_b32_e32 v4, s12 +; GISEL-NEXT: v_mov_b32_e32 v5, s13 +; GISEL-NEXT: v_mov_b32_e32 v6, s14 +; GISEL-NEXT: v_mov_b32_e32 v7, s15 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm %ptr = load ptr addrspace(1), ptr addrspace(4) poison %val = load <8 x i32>, ptr addrspace(1) %ptr call void @external_void_func_v8i32(<8 x i32> %val) @@ -4052,31 +4906,31 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v8i32_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: v_mov_b32_e32 v0, 1 -; GFX9-NEXT: v_mov_b32_e32 v1, 2 -; GFX9-NEXT: v_mov_b32_e32 v2, 3 -; GFX9-NEXT: v_mov_b32_e32 v3, 4 -; GFX9-NEXT: v_mov_b32_e32 v4, 5 -; GFX9-NEXT: v_mov_b32_e32 v5, 6 -; GFX9-NEXT: v_mov_b32_e32 v6, 7 -; GFX9-NEXT: v_mov_b32_e32 v7, 8 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v8i32_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: v_mov_b32_e32 v0, 1 +; SDAG-NEXT: v_mov_b32_e32 v1, 2 +; SDAG-NEXT: v_mov_b32_e32 v2, 3 +; SDAG-NEXT: v_mov_b32_e32 v3, 4 +; SDAG-NEXT: v_mov_b32_e32 v4, 5 +; SDAG-NEXT: v_mov_b32_e32 v5, 6 +; SDAG-NEXT: v_mov_b32_e32 v6, 7 +; SDAG-NEXT: v_mov_b32_e32 v7, 8 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v8i32_imm: ; GFX11: ; %bb.0: @@ -4114,6 +4968,32 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v8i32_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 1 +; GISEL-NEXT: v_mov_b32_e32 v1, 2 +; GISEL-NEXT: v_mov_b32_e32 v2, 3 +; GISEL-NEXT: v_mov_b32_e32 v3, 4 +; GISEL-NEXT: v_mov_b32_e32 v4, 5 +; GISEL-NEXT: v_mov_b32_e32 v5, 6 +; GISEL-NEXT: v_mov_b32_e32 v6, 7 +; GISEL-NEXT: v_mov_b32_e32 v7, 8 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm call void @external_void_func_v8i32(<8 x i32> ) ret void } @@ -4171,31 +5051,31 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v16i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 -; GFX9-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32 -; GFX9-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v16i32: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SDAG-NEXT: s_mov_b32 s2, -1 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 +; SDAG-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 +; SDAG-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32 +; SDAG-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v16i32: ; GFX11: ; %bb.0: @@ -4238,6 +5118,44 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v16i32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[0:1], 0x0 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, s8 +; GISEL-NEXT: v_mov_b32_e32 v1, s9 +; GISEL-NEXT: v_mov_b32_e32 v2, s10 +; GISEL-NEXT: v_mov_b32_e32 v3, s11 +; GISEL-NEXT: v_mov_b32_e32 v4, s12 +; GISEL-NEXT: v_mov_b32_e32 v5, s13 +; GISEL-NEXT: v_mov_b32_e32 v6, s14 +; GISEL-NEXT: v_mov_b32_e32 v7, s15 +; GISEL-NEXT: v_mov_b32_e32 v8, s16 +; GISEL-NEXT: v_mov_b32_e32 v9, s17 +; GISEL-NEXT: v_mov_b32_e32 v10, s18 +; GISEL-NEXT: v_mov_b32_e32 v11, s19 +; GISEL-NEXT: v_mov_b32_e32 v12, s20 +; GISEL-NEXT: v_mov_b32_e32 v13, s21 +; GISEL-NEXT: v_mov_b32_e32 v14, s22 +; GISEL-NEXT: v_mov_b32_e32 v15, s23 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm %ptr = load ptr addrspace(1), ptr addrspace(4) poison %val = load <16 x i32>, ptr addrspace(1) %ptr call void @external_void_func_v16i32(<16 x i32> %val) @@ -4309,37 +5227,37 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[8:9] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v32i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b32 s7, 0xf000 -; GFX9-NEXT: s_mov_b32 s6, -1 -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 -; GFX9-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 -; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 -; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 -; GFX9-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 -; GFX9-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 -; GFX9-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 -; GFX9-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_getpc_b64 s[8:9] -; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_waitcnt vmcnt(6) -; GFX9-NEXT: buffer_store_dword v31, off, s[36:39], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v32i32: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SDAG-NEXT: s_mov_b32 s6, -1 +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 +; SDAG-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 +; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 +; SDAG-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 +; SDAG-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 +; SDAG-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 +; SDAG-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 +; SDAG-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_getpc_b64 s[8:9] +; SDAG-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: s_waitcnt vmcnt(6) +; SDAG-NEXT: buffer_store_dword v31, off, s[36:39], s32 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[8:9] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v32i32: ; GFX11: ; %bb.0: @@ -4394,6 +5312,62 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32 ; HSA-NEXT: s_swappc_b64 s[30:31], s[12:13] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v32i32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GISEL-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s54, -1 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[0:1], 0x40 +; GISEL-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x0 +; GISEL-NEXT: s_mov_b32 s55, 0xe00000 +; GISEL-NEXT: s_add_u32 s52, s52, s3 +; GISEL-NEXT: s_addc_u32 s53, s53, 0 +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, s23 +; GISEL-NEXT: s_mov_b64 s[0:1], s[52:53] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v32i32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32@rel32@hi+12 +; GISEL-NEXT: buffer_store_dword v0, off, s[52:55], s32 +; GISEL-NEXT: v_mov_b32_e32 v0, s36 +; GISEL-NEXT: v_mov_b32_e32 v1, s37 +; GISEL-NEXT: v_mov_b32_e32 v2, s38 +; GISEL-NEXT: v_mov_b32_e32 v3, s39 +; GISEL-NEXT: v_mov_b32_e32 v4, s40 +; GISEL-NEXT: v_mov_b32_e32 v5, s41 +; GISEL-NEXT: v_mov_b32_e32 v6, s42 +; GISEL-NEXT: v_mov_b32_e32 v7, s43 +; GISEL-NEXT: v_mov_b32_e32 v8, s44 +; GISEL-NEXT: v_mov_b32_e32 v9, s45 +; GISEL-NEXT: v_mov_b32_e32 v10, s46 +; GISEL-NEXT: v_mov_b32_e32 v11, s47 +; GISEL-NEXT: v_mov_b32_e32 v12, s48 +; GISEL-NEXT: v_mov_b32_e32 v13, s49 +; GISEL-NEXT: v_mov_b32_e32 v14, s50 +; GISEL-NEXT: v_mov_b32_e32 v15, s51 +; GISEL-NEXT: v_mov_b32_e32 v16, s8 +; GISEL-NEXT: v_mov_b32_e32 v17, s9 +; GISEL-NEXT: v_mov_b32_e32 v18, s10 +; GISEL-NEXT: v_mov_b32_e32 v19, s11 +; GISEL-NEXT: s_mov_b64 s[2:3], s[54:55] +; GISEL-NEXT: v_mov_b32_e32 v20, s12 +; GISEL-NEXT: v_mov_b32_e32 v21, s13 +; GISEL-NEXT: v_mov_b32_e32 v22, s14 +; GISEL-NEXT: v_mov_b32_e32 v23, s15 +; GISEL-NEXT: v_mov_b32_e32 v24, s16 +; GISEL-NEXT: v_mov_b32_e32 v25, s17 +; GISEL-NEXT: v_mov_b32_e32 v26, s18 +; GISEL-NEXT: v_mov_b32_e32 v27, s19 +; GISEL-NEXT: v_mov_b32_e32 v28, s20 +; GISEL-NEXT: v_mov_b32_e32 v29, s21 +; GISEL-NEXT: v_mov_b32_e32 v30, s22 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm %ptr = load ptr addrspace(1), ptr addrspace(4) poison %val = load <32 x i32>, ptr addrspace(1) %ptr call void @external_void_func_v32i32(<32 x i32> %val) @@ -4471,40 +5445,40 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v32i32_i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b32 s7, 0xf000 -; GFX9-NEXT: s_mov_b32 s6, -1 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: buffer_load_dword v32, off, s[4:7], 0 -; GFX9-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 -; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 -; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 -; GFX9-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 -; GFX9-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 -; GFX9-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 -; GFX9-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 -; GFX9-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_waitcnt vmcnt(8) -; GFX9-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4 -; GFX9-NEXT: s_waitcnt vmcnt(8) -; GFX9-NEXT: buffer_store_dword v31, off, s[36:39], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v32i32_i32: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s5 +; SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SDAG-NEXT: s_mov_b32 s6, -1 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: buffer_load_dword v32, off, s[4:7], 0 +; SDAG-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 +; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 +; SDAG-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 +; SDAG-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 +; SDAG-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 +; SDAG-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 +; SDAG-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 +; SDAG-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: s_waitcnt vmcnt(8) +; SDAG-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4 +; SDAG-NEXT: s_waitcnt vmcnt(8) +; SDAG-NEXT: buffer_store_dword v31, off, s[36:39], s32 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v32i32_i32: ; GFX11: ; %bb.0: @@ -4566,6 +5540,67 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v32i32_i32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GISEL-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s54, -1 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[0:1], 0x40 +; GISEL-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x0 +; GISEL-NEXT: s_load_dword s2, s[0:1], 0x0 +; GISEL-NEXT: s_mov_b32 s55, 0xe00000 +; GISEL-NEXT: s_add_u32 s52, s52, s5 +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_addc_u32 s53, s53, 0 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-NEXT: ; kill: killed $sgpr0_sgpr1 +; GISEL-NEXT: ; kill: killed $sgpr0_sgpr1 +; GISEL-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4 +; GISEL-NEXT: v_mov_b32_e32 v0, s23 +; GISEL-NEXT: s_mov_b64 s[0:1], s[52:53] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12 +; GISEL-NEXT: buffer_store_dword v0, off, s[52:55], s32 +; GISEL-NEXT: v_mov_b32_e32 v0, s36 +; GISEL-NEXT: v_mov_b32_e32 v1, s37 +; GISEL-NEXT: v_mov_b32_e32 v2, s38 +; GISEL-NEXT: v_mov_b32_e32 v3, s39 +; GISEL-NEXT: v_mov_b32_e32 v4, s40 +; GISEL-NEXT: v_mov_b32_e32 v5, s41 +; GISEL-NEXT: v_mov_b32_e32 v6, s42 +; GISEL-NEXT: v_mov_b32_e32 v7, s43 +; GISEL-NEXT: v_mov_b32_e32 v8, s44 +; GISEL-NEXT: v_mov_b32_e32 v9, s45 +; GISEL-NEXT: v_mov_b32_e32 v10, s46 +; GISEL-NEXT: v_mov_b32_e32 v11, s47 +; GISEL-NEXT: v_mov_b32_e32 v12, s48 +; GISEL-NEXT: v_mov_b32_e32 v13, s49 +; GISEL-NEXT: v_mov_b32_e32 v14, s50 +; GISEL-NEXT: v_mov_b32_e32 v15, s51 +; GISEL-NEXT: v_mov_b32_e32 v16, s8 +; GISEL-NEXT: v_mov_b32_e32 v17, s9 +; GISEL-NEXT: v_mov_b32_e32 v18, s10 +; GISEL-NEXT: v_mov_b32_e32 v19, s11 +; GISEL-NEXT: s_mov_b64 s[2:3], s[54:55] +; GISEL-NEXT: v_mov_b32_e32 v20, s12 +; GISEL-NEXT: v_mov_b32_e32 v21, s13 +; GISEL-NEXT: v_mov_b32_e32 v22, s14 +; GISEL-NEXT: v_mov_b32_e32 v23, s15 +; GISEL-NEXT: v_mov_b32_e32 v24, s16 +; GISEL-NEXT: v_mov_b32_e32 v25, s17 +; GISEL-NEXT: v_mov_b32_e32 v26, s18 +; GISEL-NEXT: v_mov_b32_e32 v27, s19 +; GISEL-NEXT: v_mov_b32_e32 v28, s20 +; GISEL-NEXT: v_mov_b32_e32 v29, s21 +; GISEL-NEXT: v_mov_b32_e32 v30, s22 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm %ptr0 = load ptr addrspace(1), ptr addrspace(4) poison %val0 = load <32 x i32>, ptr addrspace(1) %ptr0 %val1 = load i32, ptr addrspace(1) poison @@ -4622,29 +5657,29 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_i32_func_i32_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s5 -; GFX9-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX9-NEXT: v_mov_b32_e32 v0, 42 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_mov_b32 s39, 0xf000 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_i32_func_i32_imm: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s50, -1 +; SDAG-NEXT: s_mov_b32 s51, 0xe00000 +; SDAG-NEXT: s_add_u32 s48, s48, s5 +; SDAG-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24 +; SDAG-NEXT: s_addc_u32 s49, s49, 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[48:49] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[50:51] +; SDAG-NEXT: v_mov_b32_e32 v0, 42 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_mov_b32 s39, 0xf000 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: buffer_store_dword v0, off, s[36:39], 0 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_i32_func_i32_imm: ; GFX11: ; %bb.0: @@ -4682,6 +5717,30 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) ; HSA-NEXT: buffer_store_dword v0, off, s[36:39], 0 ; HSA-NEXT: s_waitcnt vmcnt(0) ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_i32_func_i32_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s50, -1 +; GISEL-NEXT: s_mov_b32 s51, 0xe00000 +; GISEL-NEXT: s_add_u32 s48, s48, s5 +; GISEL-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24 +; GISEL-NEXT: s_addc_u32 s49, s49, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[48:49] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 42 +; GISEL-NEXT: s_mov_b64 s[2:3], s[50:51] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xf000 +; GISEL-NEXT: buffer_store_dword v0, off, s[36:39], 0 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_endpgm %val = call i32 @external_i32_func_i32(i32 42) store volatile i32 %val, ptr addrspace(1) %out ret void @@ -4736,29 +5795,29 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_struct_i8_i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_struct_i8_i32: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SDAG-NEXT: s_mov_b32 s2, -1 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 +; SDAG-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_struct_i8_i32: ; GFX11: ; %bb.0: @@ -4797,6 +5856,30 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_struct_i8_i32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[8:9] +; GISEL-NEXT: s_add_u32 s8, s8, external_void_func_struct_i8_i32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s9, s9, external_void_func_struct_i8_i32@rel32@hi+12 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GISEL-NEXT: s_endpgm %ptr0 = load ptr addrspace(1), ptr addrspace(4) poison %val = load { i8, i32 }, ptr addrspace(1) %ptr0 call void @external_void_func_struct_i8_i32({ i8, i32 } %val) @@ -4860,34 +5943,34 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_byval_struct_i8_i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: v_mov_b32_e32 v0, 3 -; GFX9-NEXT: buffer_store_byte v0, off, s[36:39], 0 -; GFX9-NEXT: v_mov_b32_e32 v0, 8 -; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 -; GFX9-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_movk_i32 s32, 0x400 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: buffer_store_dword v1, off, s[36:39], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_byval_struct_i8_i32: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: v_mov_b32_e32 v0, 3 +; SDAG-NEXT: buffer_store_byte v0, off, s[36:39], 0 +; SDAG-NEXT: v_mov_b32_e32 v0, 8 +; SDAG-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 +; SDAG-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: buffer_load_dword v1, off, s[36:39], 0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_movk_i32 s32, 0x400 +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: s_waitcnt vmcnt(1) +; SDAG-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 +; SDAG-NEXT: s_waitcnt vmcnt(1) +; SDAG-NEXT: buffer_store_dword v1, off, s[36:39], s32 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-TRUE16-LABEL: test_call_external_void_func_byval_struct_i8_i32: ; GFX11-TRUE16: ; %bb.0: @@ -4948,6 +6031,35 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 ; HSA-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_byval_struct_i8_i32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: v_mov_b32_e32 v0, 3 +; GISEL-NEXT: buffer_store_byte v0, off, s[36:39], 0 +; GISEL-NEXT: v_mov_b32_e32 v0, 8 +; GISEL-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 +; GISEL-NEXT: buffer_load_dword v0, off, s[36:39], 0 +; GISEL-NEXT: s_nop 0 +; GISEL-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:4 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_movk_i32 s32, 0x400 +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_waitcnt vmcnt(1) +; GISEL-NEXT: buffer_store_dword v0, off, s[36:39], s32 +; GISEL-NEXT: s_waitcnt vmcnt(1) +; GISEL-NEXT: buffer_store_dword v1, off, s[36:39], s32 offset:4 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm %val = alloca { i8, i32 }, align 8, addrspace(5) %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 0 %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 1 @@ -5034,44 +6146,44 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: v_mov_b32_e32 v0, 3 -; GFX9-NEXT: buffer_store_byte v0, off, s[36:39], 0 -; GFX9-NEXT: v_mov_b32_e32 v0, 8 -; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 -; GFX9-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0 -; GFX9-NEXT: s_movk_i32 s32, 0x800 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: buffer_store_dword v1, off, s[36:39], s32 -; GFX9-NEXT: v_mov_b32_e32 v0, 8 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8 -; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s5 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: v_mov_b32_e32 v0, 3 +; SDAG-NEXT: buffer_store_byte v0, off, s[36:39], 0 +; SDAG-NEXT: v_mov_b32_e32 v0, 8 +; SDAG-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 +; SDAG-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: buffer_load_dword v1, off, s[36:39], 0 +; SDAG-NEXT: s_movk_i32 s32, 0x800 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: s_waitcnt vmcnt(1) +; SDAG-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 +; SDAG-NEXT: s_waitcnt vmcnt(1) +; SDAG-NEXT: buffer_store_dword v1, off, s[36:39], s32 +; SDAG-NEXT: v_mov_b32_e32 v0, 8 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8 +; SDAG-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12 +; SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SDAG-NEXT: s_mov_b32 s2, -1 +; SDAG-NEXT: s_waitcnt vmcnt(1) +; SDAG-NEXT: buffer_store_byte v0, off, s[0:3], 0 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: buffer_store_dword v1, off, s[0:3], 0 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_endpgm ; ; GFX11-TRUE16-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; GFX11-TRUE16: ; %bb.0: @@ -5170,6 +6282,45 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; HSA-NEXT: buffer_store_dword v1, off, s[4:7], 0 ; HSA-NEXT: s_waitcnt vmcnt(0) ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s5 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: v_mov_b32_e32 v0, 3 +; GISEL-NEXT: buffer_store_byte v0, off, s[36:39], 0 +; GISEL-NEXT: v_mov_b32_e32 v0, 8 +; GISEL-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 +; GISEL-NEXT: buffer_load_dword v0, off, s[36:39], 0 +; GISEL-NEXT: s_nop 0 +; GISEL-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:4 +; GISEL-NEXT: s_movk_i32 s32, 0x800 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_waitcnt vmcnt(1) +; GISEL-NEXT: buffer_store_dword v0, off, s[36:39], s32 +; GISEL-NEXT: s_waitcnt vmcnt(1) +; GISEL-NEXT: buffer_store_dword v1, off, s[36:39], s32 offset:4 +; GISEL-NEXT: v_mov_b32_e32 v0, 8 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8 +; GISEL-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12 +; GISEL-NEXT: s_mov_b32 s2, -1 +; GISEL-NEXT: s_mov_b32 s3, 0xf000 +; GISEL-NEXT: s_waitcnt vmcnt(1) +; GISEL-NEXT: buffer_store_byte v0, off, s[0:3], 0 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: buffer_store_dword v1, off, s[0:3], 0 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_endpgm %in.val = alloca { i8, i32 }, align 8, addrspace(5) %out.val = alloca { i8, i32 }, align 8, addrspace(5) %in.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 0 @@ -5272,47 +6423,47 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: test_call_external_void_func_v16i8: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v16, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v18, 24, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v10, 16, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v13, 8, v3 -; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v3 -; GFX9-NEXT: v_lshrrev_b32_e32 v15, 24, v3 -; GFX9-NEXT: v_mov_b32_e32 v4, v1 -; GFX9-NEXT: v_mov_b32_e32 v8, v2 -; GFX9-NEXT: v_mov_b32_e32 v12, v3 -; GFX9-NEXT: v_mov_b32_e32 v1, v16 -; GFX9-NEXT: v_mov_b32_e32 v2, v17 -; GFX9-NEXT: v_mov_b32_e32 v3, v18 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: test_call_external_void_func_v16i8: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s38, -1 +; SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; SDAG-NEXT: s_add_u32 s36, s36, s3 +; SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SDAG-NEXT: s_mov_b32 s2, -1 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 +; SDAG-NEXT: s_addc_u32 s37, s37, 0 +; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: v_lshrrev_b32_e32 v16, 8, v0 +; SDAG-NEXT: v_lshrrev_b32_e32 v17, 16, v0 +; SDAG-NEXT: v_lshrrev_b32_e32 v18, 24, v0 +; SDAG-NEXT: v_lshrrev_b32_e32 v5, 8, v1 +; SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v1 +; SDAG-NEXT: v_lshrrev_b32_e32 v7, 24, v1 +; SDAG-NEXT: v_lshrrev_b32_e32 v9, 8, v2 +; SDAG-NEXT: v_lshrrev_b32_e32 v10, 16, v2 +; SDAG-NEXT: v_lshrrev_b32_e32 v11, 24, v2 +; SDAG-NEXT: v_lshrrev_b32_e32 v13, 8, v3 +; SDAG-NEXT: v_lshrrev_b32_e32 v14, 16, v3 +; SDAG-NEXT: v_lshrrev_b32_e32 v15, 24, v3 +; SDAG-NEXT: v_mov_b32_e32 v4, v1 +; SDAG-NEXT: v_mov_b32_e32 v8, v2 +; SDAG-NEXT: v_mov_b32_e32 v12, v3 +; SDAG-NEXT: v_mov_b32_e32 v1, v16 +; SDAG-NEXT: v_mov_b32_e32 v2, v17 +; SDAG-NEXT: v_mov_b32_e32 v3, v18 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v16i8: ; GFX11: ; %bb.0: @@ -5384,6 +6535,56 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { ; HSA-NEXT: v_mov_b32_e32 v3, v18 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: test_call_external_void_func_v16i8: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s38, -1 +; GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GISEL-NEXT: s_add_u32 s36, s36, s3 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 +; GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: s_lshr_b32 s8, s0, 8 +; GISEL-NEXT: s_lshr_b32 s9, s0, 16 +; GISEL-NEXT: s_lshr_b32 s10, s0, 24 +; GISEL-NEXT: s_lshr_b32 s11, s1, 8 +; GISEL-NEXT: s_lshr_b32 s12, s1, 16 +; GISEL-NEXT: s_lshr_b32 s13, s1, 24 +; GISEL-NEXT: s_lshr_b32 s14, s2, 8 +; GISEL-NEXT: s_lshr_b32 s15, s2, 16 +; GISEL-NEXT: s_lshr_b32 s16, s2, 24 +; GISEL-NEXT: s_lshr_b32 s17, s3, 8 +; GISEL-NEXT: s_lshr_b32 s18, s3, 16 +; GISEL-NEXT: s_lshr_b32 s19, s3, 24 +; GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-NEXT: v_mov_b32_e32 v4, s1 +; GISEL-NEXT: v_mov_b32_e32 v8, s2 +; GISEL-NEXT: v_mov_b32_e32 v12, s3 +; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v1, s8 +; GISEL-NEXT: v_mov_b32_e32 v2, s9 +; GISEL-NEXT: v_mov_b32_e32 v3, s10 +; GISEL-NEXT: v_mov_b32_e32 v5, s11 +; GISEL-NEXT: v_mov_b32_e32 v6, s12 +; GISEL-NEXT: v_mov_b32_e32 v7, s13 +; GISEL-NEXT: v_mov_b32_e32 v9, s14 +; GISEL-NEXT: v_mov_b32_e32 v10, s15 +; GISEL-NEXT: v_mov_b32_e32 v11, s16 +; GISEL-NEXT: v_mov_b32_e32 v13, s17 +; GISEL-NEXT: v_mov_b32_e32 v14, s18 +; GISEL-NEXT: v_mov_b32_e32 v15, s19 +; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm %ptr = load ptr addrspace(1), ptr addrspace(4) poison %val = load <16 x i8>, ptr addrspace(1) %ptr call void @external_void_func_v16i8(<16 x i8> %val) @@ -5509,64 +6710,64 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: stack_passed_arg_alignment_v32i32_f64: -; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s54, -1 -; GFX9-NEXT: s_mov_b32 s55, 0xe00000 -; GFX9-NEXT: s_add_u32 s52, s52, s5 -; GFX9-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa4 -; GFX9-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24 -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_addc_u32 s53, s53, 0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, s23 -; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32 -; GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4 -; GFX9-NEXT: v_mov_b32_e32 v0, s5 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[52:53] -; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[54:55] -; GFX9-NEXT: v_mov_b32_e32 v0, s36 -; GFX9-NEXT: v_mov_b32_e32 v1, s37 -; GFX9-NEXT: v_mov_b32_e32 v2, s38 -; GFX9-NEXT: v_mov_b32_e32 v3, s39 -; GFX9-NEXT: v_mov_b32_e32 v4, s40 -; GFX9-NEXT: v_mov_b32_e32 v5, s41 -; GFX9-NEXT: v_mov_b32_e32 v6, s42 -; GFX9-NEXT: v_mov_b32_e32 v7, s43 -; GFX9-NEXT: v_mov_b32_e32 v8, s44 -; GFX9-NEXT: v_mov_b32_e32 v9, s45 -; GFX9-NEXT: v_mov_b32_e32 v10, s46 -; GFX9-NEXT: v_mov_b32_e32 v11, s47 -; GFX9-NEXT: v_mov_b32_e32 v12, s48 -; GFX9-NEXT: v_mov_b32_e32 v13, s49 -; GFX9-NEXT: v_mov_b32_e32 v14, s50 -; GFX9-NEXT: v_mov_b32_e32 v15, s51 -; GFX9-NEXT: v_mov_b32_e32 v16, s8 -; GFX9-NEXT: v_mov_b32_e32 v17, s9 -; GFX9-NEXT: v_mov_b32_e32 v18, s10 -; GFX9-NEXT: v_mov_b32_e32 v19, s11 -; GFX9-NEXT: v_mov_b32_e32 v20, s12 -; GFX9-NEXT: v_mov_b32_e32 v21, s13 -; GFX9-NEXT: v_mov_b32_e32 v22, s14 -; GFX9-NEXT: v_mov_b32_e32 v23, s15 -; GFX9-NEXT: v_mov_b32_e32 v24, s16 -; GFX9-NEXT: v_mov_b32_e32 v25, s17 -; GFX9-NEXT: v_mov_b32_e32 v26, s18 -; GFX9-NEXT: v_mov_b32_e32 v27, s19 -; GFX9-NEXT: v_mov_b32_e32 v28, s20 -; GFX9-NEXT: v_mov_b32_e32 v29, s21 -; GFX9-NEXT: v_mov_b32_e32 v30, s22 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm +; SDAG-LABEL: stack_passed_arg_alignment_v32i32_f64: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 +; SDAG-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 +; SDAG-NEXT: s_mov_b32 s54, -1 +; SDAG-NEXT: s_mov_b32 s55, 0xe00000 +; SDAG-NEXT: s_add_u32 s52, s52, s5 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64 +; SDAG-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa4 +; SDAG-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_addc_u32 s53, s53, 0 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v0, s23 +; SDAG-NEXT: buffer_store_dword v0, off, s[52:55], s32 +; SDAG-NEXT: v_mov_b32_e32 v0, s4 +; SDAG-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4 +; SDAG-NEXT: v_mov_b32_e32 v0, s5 +; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] +; SDAG-NEXT: s_mov_b64 s[0:1], s[52:53] +; SDAG-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8 +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[2:3], s[54:55] +; SDAG-NEXT: v_mov_b32_e32 v0, s36 +; SDAG-NEXT: v_mov_b32_e32 v1, s37 +; SDAG-NEXT: v_mov_b32_e32 v2, s38 +; SDAG-NEXT: v_mov_b32_e32 v3, s39 +; SDAG-NEXT: v_mov_b32_e32 v4, s40 +; SDAG-NEXT: v_mov_b32_e32 v5, s41 +; SDAG-NEXT: v_mov_b32_e32 v6, s42 +; SDAG-NEXT: v_mov_b32_e32 v7, s43 +; SDAG-NEXT: v_mov_b32_e32 v8, s44 +; SDAG-NEXT: v_mov_b32_e32 v9, s45 +; SDAG-NEXT: v_mov_b32_e32 v10, s46 +; SDAG-NEXT: v_mov_b32_e32 v11, s47 +; SDAG-NEXT: v_mov_b32_e32 v12, s48 +; SDAG-NEXT: v_mov_b32_e32 v13, s49 +; SDAG-NEXT: v_mov_b32_e32 v14, s50 +; SDAG-NEXT: v_mov_b32_e32 v15, s51 +; SDAG-NEXT: v_mov_b32_e32 v16, s8 +; SDAG-NEXT: v_mov_b32_e32 v17, s9 +; SDAG-NEXT: v_mov_b32_e32 v18, s10 +; SDAG-NEXT: v_mov_b32_e32 v19, s11 +; SDAG-NEXT: v_mov_b32_e32 v20, s12 +; SDAG-NEXT: v_mov_b32_e32 v21, s13 +; SDAG-NEXT: v_mov_b32_e32 v22, s14 +; SDAG-NEXT: v_mov_b32_e32 v23, s15 +; SDAG-NEXT: v_mov_b32_e32 v24, s16 +; SDAG-NEXT: v_mov_b32_e32 v25, s17 +; SDAG-NEXT: v_mov_b32_e32 v26, s18 +; SDAG-NEXT: v_mov_b32_e32 v27, s19 +; SDAG-NEXT: v_mov_b32_e32 v28, s20 +; SDAG-NEXT: v_mov_b32_e32 v29, s21 +; SDAG-NEXT: v_mov_b32_e32 v30, s22 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: stack_passed_arg_alignment_v32i32_f64: ; GFX11: ; %bb.0: ; %entry @@ -5662,6 +6863,65 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val ; HSA-NEXT: v_mov_b32_e32 v30, s22 ; HSA-NEXT: s_swappc_b64 s[30:31], s[24:25] ; HSA-NEXT: s_endpgm +; +; GISEL-LABEL: stack_passed_arg_alignment_v32i32_f64: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 +; GISEL-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 +; GISEL-NEXT: s_mov_b32 s54, -1 +; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64 +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xa4 +; GISEL-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24 +; GISEL-NEXT: s_mov_b32 s55, 0xe00000 +; GISEL-NEXT: s_add_u32 s52, s52, s5 +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_addc_u32 s53, s53, 0 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, s23 +; GISEL-NEXT: buffer_store_dword v0, off, s[52:55], s32 +; GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4 +; GISEL-NEXT: v_mov_b32_e32 v0, s1 +; GISEL-NEXT: s_mov_b64 s[0:1], s[52:53] +; GISEL-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8 +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, s36 +; GISEL-NEXT: v_mov_b32_e32 v1, s37 +; GISEL-NEXT: v_mov_b32_e32 v2, s38 +; GISEL-NEXT: v_mov_b32_e32 v3, s39 +; GISEL-NEXT: v_mov_b32_e32 v4, s40 +; GISEL-NEXT: v_mov_b32_e32 v5, s41 +; GISEL-NEXT: v_mov_b32_e32 v6, s42 +; GISEL-NEXT: v_mov_b32_e32 v7, s43 +; GISEL-NEXT: v_mov_b32_e32 v8, s44 +; GISEL-NEXT: v_mov_b32_e32 v9, s45 +; GISEL-NEXT: v_mov_b32_e32 v10, s46 +; GISEL-NEXT: v_mov_b32_e32 v11, s47 +; GISEL-NEXT: v_mov_b32_e32 v12, s48 +; GISEL-NEXT: v_mov_b32_e32 v13, s49 +; GISEL-NEXT: v_mov_b32_e32 v14, s50 +; GISEL-NEXT: v_mov_b32_e32 v15, s51 +; GISEL-NEXT: v_mov_b32_e32 v16, s8 +; GISEL-NEXT: v_mov_b32_e32 v17, s9 +; GISEL-NEXT: v_mov_b32_e32 v18, s10 +; GISEL-NEXT: v_mov_b32_e32 v19, s11 +; GISEL-NEXT: s_mov_b64 s[2:3], s[54:55] +; GISEL-NEXT: v_mov_b32_e32 v20, s12 +; GISEL-NEXT: v_mov_b32_e32 v21, s13 +; GISEL-NEXT: v_mov_b32_e32 v22, s14 +; GISEL-NEXT: v_mov_b32_e32 v23, s15 +; GISEL-NEXT: v_mov_b32_e32 v24, s16 +; GISEL-NEXT: v_mov_b32_e32 v25, s17 +; GISEL-NEXT: v_mov_b32_e32 v26, s18 +; GISEL-NEXT: v_mov_b32_e32 v27, s19 +; GISEL-NEXT: v_mov_b32_e32 v28, s20 +; GISEL-NEXT: v_mov_b32_e32 v29, s21 +; GISEL-NEXT: v_mov_b32_e32 v30, s22 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_endpgm entry: call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp) ret void @@ -5702,22 +6962,22 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; CI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16 ; CI-NEXT: s_setpc_b64 s[4:5] ; -; GFX9-LABEL: tail_call_byval_align16: -; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 -; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20 -; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24 -; GFX9-NEXT: s_waitcnt vmcnt(2) -; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16 -; GFX9-NEXT: s_setpc_b64 s[4:5] +; SDAG-LABEL: tail_call_byval_align16: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 +; SDAG-NEXT: buffer_load_dword v32, off, s[0:3], s32 +; SDAG-NEXT: s_getpc_b64 s[4:5] +; SDAG-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 +; SDAG-NEXT: s_waitcnt vmcnt(1) +; SDAG-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20 +; SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24 +; SDAG-NEXT: s_waitcnt vmcnt(2) +; SDAG-NEXT: buffer_store_dword v32, off, s[0:3], s32 +; SDAG-NEXT: s_waitcnt vmcnt(1) +; SDAG-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16 +; SDAG-NEXT: s_setpc_b64 s[4:5] ; ; GFX11-LABEL: tail_call_byval_align16: ; GFX11: ; %bb.0: ; %entry @@ -5749,6 +7009,23 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; HSA-NEXT: s_waitcnt vmcnt(1) ; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16 ; HSA-NEXT: s_setpc_b64 s[4:5] +; +; GISEL-LABEL: tail_call_byval_align16: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; GISEL-NEXT: s_getpc_b64 s[4:5] +; GISEL-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: buffer_store_dword v31, off, s[0:3], s32 +; GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20 +; GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16 +; GISEL-NEXT: s_setpc_b64 s[4:5] entry: %alloca = alloca double, align 8, addrspace(5) tail call void @byval_align16_f64_arg(<32 x i32> %val, ptr addrspace(5) byval(double) align 16 %alloca) diff --git a/llvm/test/CodeGen/AMDGPU/call-c-function.ll b/llvm/test/CodeGen/AMDGPU/call-c-function.ll index e1bb3eab25efd..4fbc7271ba0c5 100644 --- a/llvm/test/CodeGen/AMDGPU/call-c-function.ll +++ b/llvm/test/CodeGen/AMDGPU/call-c-function.ll @@ -1,21 +1,68 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel=0 -stop-after=finalize-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s +; RUN: llc -global-isel=0 -stop-after=finalize-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=SDAG -enable-var-scope %s +; RUN: llc -global-isel=1 -stop-after=finalize-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GISEL -enable-var-scope %s ; Test that we don't explode on calls from shaders to functions with the C calling convention. define amdgpu_ps void @amdgpu_ps_call_default_cc() { - ; CHECK-LABEL: name: amdgpu_ps_call_default_cc - ; CHECK: bb.0.main_body: - ; CHECK-NEXT: S_ENDPGM 0 + ; SDAG-LABEL: name: amdgpu_ps_call_default_cc + ; SDAG: bb.0.main_body: + ; SDAG-NEXT: S_ENDPGM 0 + ; + ; GISEL-LABEL: name: amdgpu_ps_call_default_cc + ; GISEL: bb.1.main_body: + ; GISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 + ; GISEL-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; GISEL-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; GISEL-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GISEL-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + ; GISEL-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]] + ; GISEL-NEXT: $sgpr4_sgpr5 = COPY [[DEF]] + ; GISEL-NEXT: $sgpr6_sgpr7 = COPY [[DEF]] + ; GISEL-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0 + ; GISEL-NEXT: $sgpr8_sgpr9 = COPY [[S_MOV_B]] + ; GISEL-NEXT: $sgpr10_sgpr11 = COPY [[DEF1]] + ; GISEL-NEXT: $sgpr12 = COPY [[DEF2]] + ; GISEL-NEXT: $sgpr13 = COPY [[DEF2]] + ; GISEL-NEXT: $sgpr14 = COPY [[DEF2]] + ; GISEL-NEXT: $sgpr15 = COPY [[DEF2]] + ; GISEL-NEXT: $vgpr31 = COPY [[DEF2]] + ; GISEL-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0 + ; GISEL-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_MOV_B1]], 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 + ; GISEL-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 + ; GISEL-NEXT: S_ENDPGM 0 main_body: call void null() ret void } define amdgpu_gfx void @amdgpu_gfx_call_default_cc() { - ; CHECK-LABEL: name: amdgpu_gfx_call_default_cc - ; CHECK: bb.0.main_body: - ; CHECK-NEXT: SI_RETURN + ; SDAG-LABEL: name: amdgpu_gfx_call_default_cc + ; SDAG: bb.0.main_body: + ; SDAG-NEXT: SI_RETURN + ; + ; GISEL-LABEL: name: amdgpu_gfx_call_default_cc + ; GISEL: bb.1.main_body: + ; GISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 + ; GISEL-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; GISEL-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; GISEL-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GISEL-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GISEL-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]] + ; GISEL-NEXT: $sgpr4_sgpr5 = COPY [[DEF]] + ; GISEL-NEXT: $sgpr6_sgpr7 = COPY [[DEF]] + ; GISEL-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0 + ; GISEL-NEXT: $sgpr8_sgpr9 = COPY [[S_MOV_B]] + ; GISEL-NEXT: $sgpr10_sgpr11 = COPY [[DEF1]] + ; GISEL-NEXT: $sgpr12 = COPY [[DEF2]] + ; GISEL-NEXT: $sgpr13 = COPY [[DEF2]] + ; GISEL-NEXT: $sgpr14 = COPY [[DEF2]] + ; GISEL-NEXT: $sgpr15 = COPY [[DEF2]] + ; GISEL-NEXT: $vgpr31 = COPY [[DEF2]] + ; GISEL-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0 + ; GISEL-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_MOV_B1]], 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 + ; GISEL-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 + ; GISEL-NEXT: SI_RETURN main_body: call void null() ret void diff --git a/llvm/test/CodeGen/AMDGPU/call-constexpr.ll b/llvm/test/CodeGen/AMDGPU/call-constexpr.ll index 5f324df30f7e2..fe0b0188d2d37 100644 --- a/llvm/test/CodeGen/AMDGPU/call-constexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/call-constexpr.ll @@ -1,84 +1,341 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=GCN %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=GCN,SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=GCN,GISEL %s -; GCN-LABEL: {{^}}test_bitcast_return_type_noinline: -; GCN: s_getpc_b64 -; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_noinline@rel32@lo+4 -; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_noinline@rel32@hi+12 -; GCN: s_swappc_b64 define amdgpu_kernel void @test_bitcast_return_type_noinline() #0 { +; SDAG-LABEL: test_bitcast_return_type_noinline: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 +; SDAG-NEXT: s_add_i32 s12, s12, s17 +; SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; SDAG-NEXT: s_add_u32 s0, s0, s17 +; SDAG-NEXT: s_addc_u32 s1, s1, 0 +; SDAG-NEXT: s_mov_b32 s13, s15 +; SDAG-NEXT: s_mov_b32 s12, s14 +; SDAG-NEXT: s_getpc_b64 s[18:19] +; SDAG-NEXT: s_add_u32 s18, s18, ret_i32_noinline@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s19, s19, ret_i32_noinline@rel32@hi+12 +; SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2 +; SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; SDAG-NEXT: v_or_b32_e32 v31, v0, v2 +; SDAG-NEXT: s_mov_b32 s14, s16 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[18:19] +; SDAG-NEXT: v_add_f32_e32 v0, 1.0, v0 +; SDAG-NEXT: flat_store_dword v[0:1], v0 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_bitcast_return_type_noinline: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 +; GISEL-NEXT: s_add_i32 s12, s12, s17 +; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; GISEL-NEXT: s_add_u32 s0, s0, s17 +; GISEL-NEXT: s_addc_u32 s1, s1, 0 +; GISEL-NEXT: s_mov_b32 s13, s15 +; GISEL-NEXT: s_mov_b32 s12, s14 +; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GISEL-NEXT: v_or_b32_e32 v31, v0, v2 +; GISEL-NEXT: s_getpc_b64 s[18:19] +; GISEL-NEXT: s_add_u32 s18, s18, ret_i32_noinline@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s19, s19, ret_i32_noinline@rel32@hi+12 +; GISEL-NEXT: s_mov_b32 s14, s16 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GISEL-NEXT: v_add_f32_e32 v0, 1.0, v0 +; GISEL-NEXT: flat_store_dword v[0:1], v0 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_endpgm %val = call float @ret_i32_noinline() %op = fadd float %val, 1.0 store volatile float %op, ptr addrspace(1) poison ret void } -; GCN-LABEL: {{^}}test_bitcast_return_type_alwaysinline: -; GCN: s_swappc_b64 define amdgpu_kernel void @test_bitcast_return_type_alwaysinline() #0 { +; SDAG-LABEL: test_bitcast_return_type_alwaysinline: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 +; SDAG-NEXT: s_add_i32 s12, s12, s17 +; SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; SDAG-NEXT: s_add_u32 s0, s0, s17 +; SDAG-NEXT: s_addc_u32 s1, s1, 0 +; SDAG-NEXT: s_mov_b32 s13, s15 +; SDAG-NEXT: s_mov_b32 s12, s14 +; SDAG-NEXT: s_getpc_b64 s[18:19] +; SDAG-NEXT: s_add_u32 s18, s18, ret_i32_alwaysinline@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s19, s19, ret_i32_alwaysinline@rel32@hi+12 +; SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2 +; SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; SDAG-NEXT: v_or_b32_e32 v31, v0, v2 +; SDAG-NEXT: s_mov_b32 s14, s16 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[18:19] +; SDAG-NEXT: v_add_f32_e32 v0, 1.0, v0 +; SDAG-NEXT: flat_store_dword v[0:1], v0 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_bitcast_return_type_alwaysinline: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 +; GISEL-NEXT: s_add_i32 s12, s12, s17 +; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; GISEL-NEXT: s_add_u32 s0, s0, s17 +; GISEL-NEXT: s_addc_u32 s1, s1, 0 +; GISEL-NEXT: s_mov_b32 s13, s15 +; GISEL-NEXT: s_mov_b32 s12, s14 +; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GISEL-NEXT: v_or_b32_e32 v31, v0, v2 +; GISEL-NEXT: s_getpc_b64 s[18:19] +; GISEL-NEXT: s_add_u32 s18, s18, ret_i32_alwaysinline@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s19, s19, ret_i32_alwaysinline@rel32@hi+12 +; GISEL-NEXT: s_mov_b32 s14, s16 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GISEL-NEXT: v_add_f32_e32 v0, 1.0, v0 +; GISEL-NEXT: flat_store_dword v[0:1], v0 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_endpgm %val = call float @ret_i32_alwaysinline() %op = fadd float %val, 1.0 store volatile float %op, ptr addrspace(1) poison ret void } -; GCN-LABEL: {{^}}test_bitcast_argument_type: -; GCN: s_getpc_b64 -; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@lo+4 -; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@hi+12 -; GCN: s_swappc_b64 define amdgpu_kernel void @test_bitcast_argument_type() #0 { +; SDAG-LABEL: test_bitcast_argument_type: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 +; SDAG-NEXT: s_add_i32 s12, s12, s17 +; SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; SDAG-NEXT: s_add_u32 s0, s0, s17 +; SDAG-NEXT: s_addc_u32 s1, s1, 0 +; SDAG-NEXT: s_mov_b32 s13, s15 +; SDAG-NEXT: s_mov_b32 s12, s14 +; SDAG-NEXT: s_getpc_b64 s[18:19] +; SDAG-NEXT: s_add_u32 s18, s18, ident_i32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s19, s19, ident_i32@rel32@hi+12 +; SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2 +; SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; SDAG-NEXT: v_or_b32_e32 v31, v0, v2 +; SDAG-NEXT: v_mov_b32_e32 v0, 2.0 +; SDAG-NEXT: s_mov_b32 s14, s16 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[18:19] +; SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 +; SDAG-NEXT: flat_store_dword v[0:1], v0 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_bitcast_argument_type: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 +; GISEL-NEXT: s_add_i32 s12, s12, s17 +; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; GISEL-NEXT: s_add_u32 s0, s0, s17 +; GISEL-NEXT: s_addc_u32 s1, s1, 0 +; GISEL-NEXT: s_mov_b32 s13, s15 +; GISEL-NEXT: s_mov_b32 s12, s14 +; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2 +; GISEL-NEXT: s_getpc_b64 s[18:19] +; GISEL-NEXT: s_add_u32 s18, s18, ident_i32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s19, s19, ident_i32@rel32@hi+12 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GISEL-NEXT: v_or_b32_e32 v31, v0, v2 +; GISEL-NEXT: v_mov_b32_e32 v0, 2.0 +; GISEL-NEXT: s_mov_b32 s14, s16 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 +; GISEL-NEXT: flat_store_dword v[0:1], v0 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_endpgm %val = call i32 @ident_i32(float 2.0) %op = add i32 %val, 1 store volatile i32 %op, ptr addrspace(1) poison ret void } -; GCN-LABEL: {{^}}test_bitcast_argument_and_return_types: -; GCN: s_getpc_b64 -; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@lo+4 -; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@hi+12 -; GCN: s_swappc_b64 define amdgpu_kernel void @test_bitcast_argument_and_return_types() #0 { +; SDAG-LABEL: test_bitcast_argument_and_return_types: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 +; SDAG-NEXT: s_add_i32 s12, s12, s17 +; SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; SDAG-NEXT: s_add_u32 s0, s0, s17 +; SDAG-NEXT: s_addc_u32 s1, s1, 0 +; SDAG-NEXT: s_mov_b32 s13, s15 +; SDAG-NEXT: s_mov_b32 s12, s14 +; SDAG-NEXT: s_getpc_b64 s[18:19] +; SDAG-NEXT: s_add_u32 s18, s18, ident_i32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s19, s19, ident_i32@rel32@hi+12 +; SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2 +; SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; SDAG-NEXT: v_or_b32_e32 v31, v0, v2 +; SDAG-NEXT: v_mov_b32_e32 v0, 2.0 +; SDAG-NEXT: s_mov_b32 s14, s16 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[18:19] +; SDAG-NEXT: v_add_f32_e32 v0, 1.0, v0 +; SDAG-NEXT: flat_store_dword v[0:1], v0 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_bitcast_argument_and_return_types: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 +; GISEL-NEXT: s_add_i32 s12, s12, s17 +; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; GISEL-NEXT: s_add_u32 s0, s0, s17 +; GISEL-NEXT: s_addc_u32 s1, s1, 0 +; GISEL-NEXT: s_mov_b32 s13, s15 +; GISEL-NEXT: s_mov_b32 s12, s14 +; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2 +; GISEL-NEXT: s_getpc_b64 s[18:19] +; GISEL-NEXT: s_add_u32 s18, s18, ident_i32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s19, s19, ident_i32@rel32@hi+12 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GISEL-NEXT: v_or_b32_e32 v31, v0, v2 +; GISEL-NEXT: v_mov_b32_e32 v0, 2.0 +; GISEL-NEXT: s_mov_b32 s14, s16 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GISEL-NEXT: v_add_f32_e32 v0, 1.0, v0 +; GISEL-NEXT: flat_store_dword v[0:1], v0 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_endpgm %val = call float @ident_i32(float 2.0) %op = fadd float %val, 1.0 store volatile float %op, ptr addrspace(1) poison ret void } -; GCN-LABEL: {{^}}use_workitem_id_x: -; GCN: s_waitcnt -; GCN-NEXT: v_and_b32_e32 [[TMP:v[0-9]+]], 0x3ff, v31 -; GCN-NEXT: v_add_i32_e32 v0, vcc, [[TMP]], v0 -; GCN-NEXT: s_setpc_b64 define hidden i32 @use_workitem_id_x(i32 %arg0) #3 { +; GCN-LABEL: use_workitem_id_x: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_and_b32_e32 v1, 0x3ff, v31 +; GCN-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] %id = call i32 @llvm.amdgcn.workitem.id.x() %op = add i32 %id, %arg0 ret i32 %op } -; GCN-LABEL: {{^}}test_bitcast_use_workitem_id_x: -; GCN: v_mov_b32_e32 v31, v0 -; GCN: s_getpc_b64 -; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, use_workitem_id_x@rel32@lo+4 -; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, use_workitem_id_x@rel32@hi+12 -; GCN: v_mov_b32_e32 v0, 9 -; GCN: s_swappc_b64 -; GCN: v_add_f32_e32 define amdgpu_kernel void @test_bitcast_use_workitem_id_x() #3 { +; SDAG-LABEL: test_bitcast_use_workitem_id_x: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 +; SDAG-NEXT: s_add_i32 s12, s12, s17 +; SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; SDAG-NEXT: s_add_u32 s0, s0, s17 +; SDAG-NEXT: s_addc_u32 s1, s1, 0 +; SDAG-NEXT: s_mov_b32 s13, s15 +; SDAG-NEXT: s_mov_b32 s12, s14 +; SDAG-NEXT: v_mov_b32_e32 v31, v0 +; SDAG-NEXT: s_getpc_b64 s[18:19] +; SDAG-NEXT: s_add_u32 s18, s18, use_workitem_id_x@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s19, s19, use_workitem_id_x@rel32@hi+12 +; SDAG-NEXT: v_mov_b32_e32 v0, 9 +; SDAG-NEXT: s_mov_b32 s14, s16 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[18:19] +; SDAG-NEXT: v_add_f32_e32 v0, 1.0, v0 +; SDAG-NEXT: flat_store_dword v[0:1], v0 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_bitcast_use_workitem_id_x: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 +; GISEL-NEXT: s_add_i32 s12, s12, s17 +; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; GISEL-NEXT: s_add_u32 s0, s0, s17 +; GISEL-NEXT: s_addc_u32 s1, s1, 0 +; GISEL-NEXT: v_mov_b32_e32 v31, v0 +; GISEL-NEXT: s_mov_b32 s13, s15 +; GISEL-NEXT: s_mov_b32 s12, s14 +; GISEL-NEXT: s_getpc_b64 s[18:19] +; GISEL-NEXT: s_add_u32 s18, s18, use_workitem_id_x@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s19, s19, use_workitem_id_x@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 9 +; GISEL-NEXT: s_mov_b32 s14, s16 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GISEL-NEXT: v_add_f32_e32 v0, 1.0, v0 +; GISEL-NEXT: flat_store_dword v[0:1], v0 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_endpgm %val = call float @use_workitem_id_x(i32 9) %op = fadd float %val, 1.0 store volatile float %op, ptr addrspace(1) poison ret void } -; GCN-LABEL: {{^}}test_invoke: -; GCN: s_getpc_b64 -; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@lo+4 -; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@hi+12 -; GCN: s_swappc_b64 @_ZTIi = external global ptr declare i32 @__gxx_personality_v0(...) define amdgpu_kernel void @test_invoke() #0 personality ptr @__gxx_personality_v0 { +; SDAG-LABEL: test_invoke: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 +; SDAG-NEXT: s_add_i32 s12, s12, s17 +; SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; SDAG-NEXT: s_add_u32 s0, s0, s17 +; SDAG-NEXT: s_addc_u32 s1, s1, 0 +; SDAG-NEXT: s_mov_b32 s13, s15 +; SDAG-NEXT: s_mov_b32 s12, s14 +; SDAG-NEXT: s_getpc_b64 s[18:19] +; SDAG-NEXT: s_add_u32 s18, s18, ident_i32@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s19, s19, ident_i32@rel32@hi+12 +; SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2 +; SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; SDAG-NEXT: v_or_b32_e32 v31, v0, v2 +; SDAG-NEXT: v_mov_b32_e32 v0, 2.0 +; SDAG-NEXT: s_mov_b32 s14, s16 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[18:19] +; SDAG-NEXT: v_add_f32_e32 v0, 1.0, v0 +; SDAG-NEXT: flat_store_dword v[0:1], v0 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_invoke: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 +; GISEL-NEXT: s_add_i32 s12, s12, s17 +; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; GISEL-NEXT: s_add_u32 s0, s0, s17 +; GISEL-NEXT: s_addc_u32 s1, s1, 0 +; GISEL-NEXT: s_mov_b32 s13, s15 +; GISEL-NEXT: s_mov_b32 s12, s14 +; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2 +; GISEL-NEXT: s_getpc_b64 s[18:19] +; GISEL-NEXT: s_add_u32 s18, s18, ident_i32@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s19, s19, ident_i32@rel32@hi+12 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GISEL-NEXT: v_or_b32_e32 v31, v0, v2 +; GISEL-NEXT: v_mov_b32_e32 v0, 2.0 +; GISEL-NEXT: s_mov_b32 s14, s16 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GISEL-NEXT: v_add_f32_e32 v0, 1.0, v0 +; GISEL-NEXT: flat_store_dword v[0:1], v0 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_endpgm %val = invoke float @ident_i32(float 2.0) to label %continue unwind label %broken @@ -96,14 +353,28 @@ continue: ; arguments before we lower any calls to them. define hidden i32 @ret_i32_noinline() #0 { +; GCN-LABEL: ret_i32_noinline: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, 4 +; GCN-NEXT: s_setpc_b64 s[30:31] ret i32 4 } define hidden i32 @ret_i32_alwaysinline() #1 { +; GCN-LABEL: ret_i32_alwaysinline: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, 4 +; GCN-NEXT: s_setpc_b64 s[30:31] ret i32 4 } define hidden i32 @ident_i32(i32 %i) #0 { +; GCN-LABEL: ident_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] ret i32 %i } diff --git a/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll b/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll index ffe536d347c53..4b5a49fc0c2e9 100644 --- a/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll +++ b/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -simplify-mir -stop-after=finalize-isel < %s | FileCheck %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -simplify-mir -stop-after=finalize-isel < %s | FileCheck -check-prefixes=SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -simplify-mir -stop-after=finalize-isel < %s | FileCheck -check-prefixes=GISEL %s ; Check that call / asm get an implicit-def $mode added to them in ; strictfp functions. @@ -7,46 +8,80 @@ declare protected void @maybe_defs_mode() #0 define float @call_changes_mode(float %x, float %y) #0 { - ; CHECK-LABEL: name: call_changes_mode - ; CHECK: bb.0 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @maybe_defs_mode, target-flags(amdgpu-rel32-hi) @maybe_defs_mode, implicit-def dead $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]] - ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[SI_PC_ADD_REL_OFFSET]], @maybe_defs_mode, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $mode - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; CHECK-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; SDAG-LABEL: name: call_changes_mode + ; SDAG: bb.0 (%ir-block.0): + ; SDAG-NEXT: liveins: $vgpr0, $vgpr1 + ; SDAG-NEXT: {{ $}} + ; SDAG-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; SDAG-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SDAG-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; SDAG-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @maybe_defs_mode, target-flags(amdgpu-rel32-hi) @maybe_defs_mode, implicit-def dead $scc + ; SDAG-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; SDAG-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]] + ; SDAG-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[SI_PC_ADD_REL_OFFSET]], @maybe_defs_mode, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $mode + ; SDAG-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; SDAG-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; SDAG-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; SDAG-NEXT: SI_RETURN implicit $vgpr0 + ; + ; GISEL-LABEL: name: call_changes_mode + ; GISEL: bb.1 (%ir-block.0): + ; GISEL-NEXT: liveins: $vgpr0, $vgpr1 + ; GISEL-NEXT: {{ $}} + ; GISEL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GISEL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 + ; GISEL-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GISEL-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]] + ; GISEL-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @maybe_defs_mode, target-flags(amdgpu-rel32-hi) @maybe_defs_mode, implicit-def $scc + ; GISEL-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[SI_PC_ADD_REL_OFFSET]], @maybe_defs_mode, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GISEL-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 + ; GISEL-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GISEL-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; GISEL-NEXT: SI_RETURN implicit $vgpr0 call void @maybe_defs_mode() %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") ret float %val } define void @tail_call_changes_mode() #0 { - ; CHECK-LABEL: name: tail_call_changes_mode - ; CHECK: bb.0 (%ir-block.0): - ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:ccr_sgpr_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @maybe_defs_mode, target-flags(amdgpu-rel32-hi) @maybe_defs_mode, implicit-def dead $scc - ; CHECK-NEXT: SI_TCRETURN killed [[SI_PC_ADD_REL_OFFSET]], @maybe_defs_mode, 0, csr_amdgpu, implicit-def $mode + ; SDAG-LABEL: name: tail_call_changes_mode + ; SDAG: bb.0 (%ir-block.0): + ; SDAG-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:ccr_sgpr_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @maybe_defs_mode, target-flags(amdgpu-rel32-hi) @maybe_defs_mode, implicit-def dead $scc + ; SDAG-NEXT: SI_TCRETURN killed [[SI_PC_ADD_REL_OFFSET]], @maybe_defs_mode, 0, csr_amdgpu, implicit-def $mode + ; + ; GISEL-LABEL: name: tail_call_changes_mode + ; GISEL: bb.1 (%ir-block.0): + ; GISEL-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GISEL-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]] + ; GISEL-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:ccr_sgpr_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @maybe_defs_mode, target-flags(amdgpu-rel32-hi) @maybe_defs_mode, implicit-def $scc + ; GISEL-NEXT: SI_TCRETURN [[SI_PC_ADD_REL_OFFSET]], @maybe_defs_mode, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3 tail call void @maybe_defs_mode() ret void } define float @asm_changes_mode(float %x, float %y) #0 { - ; CHECK-LABEL: name: asm_changes_mode - ; CHECK: bb.0 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, implicit-def $mode - ; CHECK-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; SDAG-LABEL: name: asm_changes_mode + ; SDAG: bb.0 (%ir-block.0): + ; SDAG-NEXT: liveins: $vgpr0, $vgpr1 + ; SDAG-NEXT: {{ $}} + ; SDAG-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; SDAG-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SDAG-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, implicit-def $mode + ; SDAG-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; SDAG-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; SDAG-NEXT: SI_RETURN implicit $vgpr0 + ; + ; GISEL-LABEL: name: asm_changes_mode + ; GISEL: bb.1 (%ir-block.0): + ; GISEL-NEXT: liveins: $vgpr0, $vgpr1 + ; GISEL-NEXT: {{ $}} + ; GISEL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GISEL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GISEL-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, implicit-def $mode + ; GISEL-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GISEL-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; GISEL-NEXT: SI_RETURN implicit $vgpr0 call void asm sideeffect "; maybe defs mode", ""() %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") ret float %val diff --git a/llvm/test/CodeGen/AMDGPU/call-encoding.ll b/llvm/test/CodeGen/AMDGPU/call-encoding.ll index 6954c340ca287..6c36c2424a66e 100644 --- a/llvm/test/CodeGen/AMDGPU/call-encoding.ll +++ b/llvm/test/CodeGen/AMDGPU/call-encoding.ll @@ -1,5 +1,7 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=fiji -d - | FileCheck --check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx900 -d - | FileCheck --check-prefix=GCN %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=fiji -d - | FileCheck --check-prefix=GCN %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx900 -d - | FileCheck --check-prefix=GCN %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=fiji -d - | FileCheck --check-prefix=GCN %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx900 -d - | FileCheck --check-prefix=GCN %s ; XUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=hawaii -d - | FileCheck --check-prefixes=GCN,CI %s ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll index 4df10497bcd27..b250227735bd3 100644 --- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -1,8 +1,13 @@ -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN,CI %s -; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN-V5 %s -; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN-V5 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 | FileCheck -check-prefixes=GCN,VI,VI-BUG %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN,CI %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN-V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN-V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 | FileCheck -check-prefixes=GCN,VI,VI-BUG %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN,CI %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN-V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN-V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 | FileCheck -check-prefixes=GCN,VI,VI-BUG %s ; Make sure to run a GPU with the SGPR allocation bug. diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll index 61a195f9c314f..aed1079158154 100644 --- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF,SDAG %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF,SDAG %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF,SDAG %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR,SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF,GISEL %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF,GISEL %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF,GISEL %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR,GISEL %s declare hidden void @external_void_func_void() #3 @@ -223,41 +227,6 @@ define hidden void @void_func_void_clobber_vcc() #2 { } define amdgpu_kernel void @test_call_void_func_void_clobber_vcc(ptr addrspace(1) %out) #0 { -; FLATSCR-LABEL: test_call_void_func_void_clobber_vcc: -; FLATSCR: ; %bb.0: -; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13 -; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 -; FLATSCR-NEXT: s_add_u32 s8, s4, 8 -; FLATSCR-NEXT: s_addc_u32 s9, s5, 0 -; FLATSCR-NEXT: v_lshlrev_b32_e32 v2, 20, v2 -; FLATSCR-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; FLATSCR-NEXT: s_mov_b32 s14, s12 -; FLATSCR-NEXT: s_mov_b32 s13, s11 -; FLATSCR-NEXT: s_mov_b32 s12, s10 -; FLATSCR-NEXT: s_mov_b64 s[10:11], s[6:7] -; FLATSCR-NEXT: s_getpc_b64 s[16:17] -; FLATSCR-NEXT: s_add_u32 s16, s16, void_func_void_clobber_vcc@rel32@lo+4 -; FLATSCR-NEXT: s_addc_u32 s17, s17, void_func_void_clobber_vcc@rel32@hi+12 -; FLATSCR-NEXT: v_or3_b32 v31, v0, v1, v2 -; FLATSCR-NEXT: s_mov_b64 s[4:5], s[0:1] -; FLATSCR-NEXT: s_mov_b64 s[6:7], s[2:3] -; FLATSCR-NEXT: s_mov_b32 s32, 0 -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ; def vcc -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: s_mov_b64 s[34:35], vcc -; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[16:17] -; FLATSCR-NEXT: global_load_dword v0, v[0:1], off glc -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_mov_b64 vcc, s[34:35] -; FLATSCR-NEXT: global_load_dword v0, v[0:1], off glc -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: ; kill: killed $vgpr0_vgpr1 -; FLATSCR-NEXT: ; kill: killed $vgpr0_vgpr1 -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ; use vcc -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: s_endpgm %vcc = call i64 asm sideeffect "; def $0", "={vcc}"() call void @void_func_void_clobber_vcc() %val0 = load volatile i32, ptr addrspace(1) poison @@ -463,51 +432,11 @@ define hidden void @void_func_void_clobber_s34() #2 { } define amdgpu_kernel void @test_call_void_func_void_clobber_s33() #0 { -; FLATSCR-LABEL: test_call_void_func_void_clobber_s33: -; FLATSCR: ; %bb.0: -; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13 -; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 -; FLATSCR-NEXT: v_lshlrev_b32_e32 v2, 20, v2 -; FLATSCR-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; FLATSCR-NEXT: s_mov_b32 s14, s12 -; FLATSCR-NEXT: s_mov_b32 s13, s11 -; FLATSCR-NEXT: s_mov_b32 s12, s10 -; FLATSCR-NEXT: s_mov_b64 s[10:11], s[6:7] -; FLATSCR-NEXT: s_mov_b64 s[8:9], s[4:5] -; FLATSCR-NEXT: s_getpc_b64 s[16:17] -; FLATSCR-NEXT: s_add_u32 s16, s16, void_func_void_clobber_s33@rel32@lo+4 -; FLATSCR-NEXT: s_addc_u32 s17, s17, void_func_void_clobber_s33@rel32@hi+12 -; FLATSCR-NEXT: v_or3_b32 v31, v0, v1, v2 -; FLATSCR-NEXT: s_mov_b64 s[4:5], s[0:1] -; FLATSCR-NEXT: s_mov_b64 s[6:7], s[2:3] -; FLATSCR-NEXT: s_mov_b32 s32, 0 -; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[16:17] -; FLATSCR-NEXT: s_endpgm call void @void_func_void_clobber_s33() ret void } define amdgpu_kernel void @test_call_void_func_void_clobber_s34() #0 { -; FLATSCR-LABEL: test_call_void_func_void_clobber_s34: -; FLATSCR: ; %bb.0: -; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13 -; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 -; FLATSCR-NEXT: v_lshlrev_b32_e32 v2, 20, v2 -; FLATSCR-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; FLATSCR-NEXT: s_mov_b32 s14, s12 -; FLATSCR-NEXT: s_mov_b32 s13, s11 -; FLATSCR-NEXT: s_mov_b32 s12, s10 -; FLATSCR-NEXT: s_mov_b64 s[10:11], s[6:7] -; FLATSCR-NEXT: s_mov_b64 s[8:9], s[4:5] -; FLATSCR-NEXT: s_getpc_b64 s[16:17] -; FLATSCR-NEXT: s_add_u32 s16, s16, void_func_void_clobber_s34@rel32@lo+4 -; FLATSCR-NEXT: s_addc_u32 s17, s17, void_func_void_clobber_s34@rel32@hi+12 -; FLATSCR-NEXT: v_or3_b32 v31, v0, v1, v2 -; FLATSCR-NEXT: s_mov_b64 s[4:5], s[0:1] -; FLATSCR-NEXT: s_mov_b64 s[6:7], s[2:3] -; FLATSCR-NEXT: s_mov_b32 s32, 0 -; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[16:17] -; FLATSCR-NEXT: s_endpgm call void @void_func_void_clobber_s34() ret void } @@ -748,3 +677,6 @@ attributes #0 = { nounwind } attributes #1 = { nounwind readnone } attributes #2 = { nounwind noinline } attributes #3 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GISEL: {{.*}} +; SDAG: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/call-return-types.ll b/llvm/test/CodeGen/AMDGPU/call-return-types.ll index c0f74fd85f0e6..21c3696ae98a9 100644 --- a/llvm/test/CodeGen/AMDGPU/call-return-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-return-types.ll @@ -1,7 +1,12 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX89 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX89 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX11 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX89 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX89 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX11 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX89 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX89 %s + +; Ideally, we would also like to test GlobalISel with gfx11 but we are currently blocked on llvm-project#166501. declare void @external_void_func_void() #0 diff --git a/llvm/test/CodeGen/AMDGPU/call-skip.ll b/llvm/test/CodeGen/AMDGPU/call-skip.ll index ea2bba1673a0b..e2ca278d687be 100644 --- a/llvm/test/CodeGen/AMDGPU/call-skip.ll +++ b/llvm/test/CodeGen/AMDGPU/call-skip.ll @@ -1,4 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel=0 -mcpu=hawaii < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel=1 -mcpu=hawaii < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GISEL %s ; A call should be skipped if all lanes are zero, since we don't know ; what side effects should be avoided inside the call. @@ -6,12 +8,37 @@ define hidden void @func() #1 { ret void } -; GCN-LABEL: {{^}}if_call: -; GCN: s_and_saveexec_b64 -; GCN-NEXT: s_cbranch_execz [[END:.LBB[0-9]+_[0-9]+]] -; GCN: s_swappc_b64 -; GCN: [[END]]: define void @if_call(i32 %flag) #0 { +; GCN-LABEL: if_call: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s20, s33 +; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_xor_saveexec_b64 s[16:17], -1 +; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[16:17] +; GCN-NEXT: v_writelane_b32 v1, s30, 0 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v1, s31, 1 +; GCN-NEXT: s_and_saveexec_b64 s[16:17], vcc +; GCN-NEXT: s_cbranch_execz .LBB1_2 +; GCN-NEXT: ; %bb.1: ; %call +; GCN-NEXT: s_getpc_b64 s[18:19] +; GCN-NEXT: s_add_u32 s18, s18, func@rel32@lo+4 +; GCN-NEXT: s_addc_u32 s19, s19, func@rel32@hi+12 +; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GCN-NEXT: .LBB1_2: ; %end +; GCN-NEXT: s_or_b64 exec, exec, s[16:17] +; GCN-NEXT: v_readlane_b32 s31, v1, 1 +; GCN-NEXT: v_readlane_b32 s30, v1, 0 +; GCN-NEXT: s_mov_b32 s32, s33 +; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b32 s33, s20 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] %cc = icmp eq i32 %flag, 0 br i1 %cc, label %call, label %end @@ -23,12 +50,20 @@ end: ret void } -; GCN-LABEL: {{^}}if_asm: -; GCN: s_and_saveexec_b64 -; GCN-NEXT: s_cbranch_execz [[END:.LBB[0-9]+_[0-9]+]] -; GCN: ; sample asm -; GCN: [[END]]: define void @if_asm(i32 %flag) #0 { +; GCN-LABEL: if_asm: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GCN-NEXT: s_cbranch_execz .LBB2_2 +; GCN-NEXT: ; %bb.1: ; %call +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; sample asm +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: .LBB2_2: ; %end +; GCN-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] %cc = icmp eq i32 %flag, 0 br i1 %cc, label %call, label %end @@ -40,11 +75,58 @@ end: ret void } -; GCN-LABEL: {{^}}if_call_kernel: -; GCN: s_and_saveexec_b64 -; GCN-NEXT: s_cbranch_execz .LBB3_2 -; GCN: s_swappc_b64 define amdgpu_kernel void @if_call_kernel() #0 { +; SDAG-LABEL: if_call_kernel: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_add_i32 s12, s12, s17 +; SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; SDAG-NEXT: s_add_u32 s0, s0, s17 +; SDAG-NEXT: s_addc_u32 s1, s1, 0 +; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 +; SDAG-NEXT: s_and_saveexec_b64 s[12:13], vcc +; SDAG-NEXT: s_cbranch_execz .LBB3_2 +; SDAG-NEXT: ; %bb.1: ; %call +; SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2 +; SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; SDAG-NEXT: s_getpc_b64 s[18:19] +; SDAG-NEXT: s_add_u32 s18, s18, func@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s19, s19, func@rel32@hi+12 +; SDAG-NEXT: v_or_b32_e32 v31, v0, v2 +; SDAG-NEXT: s_mov_b32 s12, s14 +; SDAG-NEXT: s_mov_b32 s13, s15 +; SDAG-NEXT: s_mov_b32 s14, s16 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[18:19] +; SDAG-NEXT: .LBB3_2: ; %end +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: if_call_kernel: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_add_i32 s12, s12, s17 +; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; GISEL-NEXT: s_add_u32 s0, s0, s17 +; GISEL-NEXT: s_addc_u32 s1, s1, 0 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 +; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc +; GISEL-NEXT: s_cbranch_execz .LBB3_2 +; GISEL-NEXT: ; %bb.1: ; %call +; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2 +; GISEL-NEXT: s_getpc_b64 s[18:19] +; GISEL-NEXT: s_add_u32 s18, s18, func@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s19, s19, func@rel32@hi+12 +; GISEL-NEXT: v_or_b32_e32 v31, v0, v1 +; GISEL-NEXT: s_mov_b32 s12, s14 +; GISEL-NEXT: s_mov_b32 s13, s15 +; GISEL-NEXT: s_mov_b32 s14, s16 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GISEL-NEXT: .LBB3_2: ; %end +; GISEL-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %cc = icmp eq i32 %id, 0 br i1 %cc, label %call, label %end diff --git a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll index 675acd0eedfc5..a52942cae1699 100644 --- a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll +++ b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GISEL %s ; Load argument depends on waitcnt which should be skipped. define amdgpu_kernel void @call_memory_arg_load(ptr addrspace(3) %ptr, i32) #0 { @@ -27,24 +28,43 @@ define amdgpu_kernel void @call_memory_arg_load(ptr addrspace(3) %ptr, i32) #0 { ; Memory waitcnt with no register dependence on the call define amdgpu_kernel void @call_memory_no_dep(ptr addrspace(1) %ptr, i32) #0 { -; GCN-LABEL: call_memory_no_dep: -; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 -; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11 -; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 -; GCN-NEXT: s_add_u32 s0, s0, s11 -; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: v_mov_b32_e32 v0, 0 -; GCN-NEXT: s_getpc_b64 s[8:9] -; GCN-NEXT: s_add_u32 s8, s8, func@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12 -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: global_store_dword v0, v0, s[6:7] -; GCN-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-NEXT: v_mov_b32_e32 v0, 0 -; GCN-NEXT: s_mov_b32 s32, 0 -; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GCN-NEXT: s_endpgm +; SDAG-LABEL: call_memory_no_dep: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 +; SDAG-NEXT: s_add_u32 flat_scratch_lo, s8, s11 +; SDAG-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 +; SDAG-NEXT: s_add_u32 s0, s0, s11 +; SDAG-NEXT: s_addc_u32 s1, s1, 0 +; SDAG-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-NEXT: s_getpc_b64 s[8:9] +; SDAG-NEXT: s_add_u32 s8, s8, func@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: global_store_dword v0, v0, s[6:7] +; SDAG-NEXT: s_mov_b64 s[6:7], s[4:5] +; SDAG-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[8:9] +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: call_memory_no_dep: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 +; GISEL-NEXT: s_add_u32 flat_scratch_lo, s8, s11 +; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 +; GISEL-NEXT: s_add_u32 s0, s0, s11 +; GISEL-NEXT: s_addc_u32 s1, s1, 0 +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: s_getpc_b64 s[8:9] +; GISEL-NEXT: s_add_u32 s8, s8, func@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: global_store_dword v0, v0, s[6:7] +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GISEL-NEXT: s_endpgm store i32 0, ptr addrspace(1) %ptr call void @func(i32 0) ret void @@ -52,46 +72,82 @@ define amdgpu_kernel void @call_memory_no_dep(ptr addrspace(1) %ptr, i32) #0 { ; Should not wait after the call before memory define amdgpu_kernel void @call_no_wait_after_call(ptr addrspace(1) %ptr, i32) #0 { -; GCN-LABEL: call_no_wait_after_call: -; GCN: ; %bb.0: -; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11 -; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 -; GCN-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0 -; GCN-NEXT: s_add_u32 s0, s0, s11 -; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: s_getpc_b64 s[8:9] -; GCN-NEXT: s_add_u32 s8, s8, func@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12 -; GCN-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-NEXT: v_mov_b32_e32 v0, 0 -; GCN-NEXT: s_mov_b32 s32, 0 -; GCN-NEXT: v_mov_b32_e32 v40, 0 -; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GCN-NEXT: global_store_dword v40, v40, s[34:35] -; GCN-NEXT: s_endpgm +; SDAG-LABEL: call_no_wait_after_call: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_add_u32 flat_scratch_lo, s8, s11 +; SDAG-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 +; SDAG-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0 +; SDAG-NEXT: s_add_u32 s0, s0, s11 +; SDAG-NEXT: s_addc_u32 s1, s1, 0 +; SDAG-NEXT: s_getpc_b64 s[8:9] +; SDAG-NEXT: s_add_u32 s8, s8, func@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[6:7], s[4:5] +; SDAG-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: v_mov_b32_e32 v40, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[8:9] +; SDAG-NEXT: global_store_dword v40, v40, s[34:35] +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: call_no_wait_after_call: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_add_u32 flat_scratch_lo, s8, s11 +; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 +; GISEL-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0 +; GISEL-NEXT: s_add_u32 s0, s0, s11 +; GISEL-NEXT: s_addc_u32 s1, s1, 0 +; GISEL-NEXT: s_getpc_b64 s[8:9] +; GISEL-NEXT: s_add_u32 s8, s8, func@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: global_store_dword v0, v0, s[34:35] +; GISEL-NEXT: s_endpgm call void @func(i32 0) store i32 0, ptr addrspace(1) %ptr ret void } define amdgpu_kernel void @call_no_wait_after_call_return_val(ptr addrspace(1) %ptr, i32) #0 { -; GCN-LABEL: call_no_wait_after_call_return_val: -; GCN: ; %bb.0: -; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11 -; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 -; GCN-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0 -; GCN-NEXT: s_add_u32 s0, s0, s11 -; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: s_getpc_b64 s[8:9] -; GCN-NEXT: s_add_u32 s8, s8, func.return@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s9, s9, func.return@rel32@hi+12 -; GCN-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-NEXT: v_mov_b32_e32 v0, 0 -; GCN-NEXT: s_mov_b32 s32, 0 -; GCN-NEXT: v_mov_b32_e32 v40, 0 -; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GCN-NEXT: global_store_dword v40, v0, s[34:35] -; GCN-NEXT: s_endpgm +; SDAG-LABEL: call_no_wait_after_call_return_val: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_add_u32 flat_scratch_lo, s8, s11 +; SDAG-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 +; SDAG-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0 +; SDAG-NEXT: s_add_u32 s0, s0, s11 +; SDAG-NEXT: s_addc_u32 s1, s1, 0 +; SDAG-NEXT: s_getpc_b64 s[8:9] +; SDAG-NEXT: s_add_u32 s8, s8, func.return@rel32@lo+4 +; SDAG-NEXT: s_addc_u32 s9, s9, func.return@rel32@hi+12 +; SDAG-NEXT: s_mov_b64 s[6:7], s[4:5] +; SDAG-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: v_mov_b32_e32 v40, 0 +; SDAG-NEXT: s_swappc_b64 s[30:31], s[8:9] +; SDAG-NEXT: global_store_dword v40, v0, s[34:35] +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: call_no_wait_after_call_return_val: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_add_u32 flat_scratch_lo, s8, s11 +; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 +; GISEL-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0 +; GISEL-NEXT: s_add_u32 s0, s0, s11 +; GISEL-NEXT: s_addc_u32 s1, s1, 0 +; GISEL-NEXT: s_getpc_b64 s[8:9] +; GISEL-NEXT: s_add_u32 s8, s8, func.return@rel32@lo+4 +; GISEL-NEXT: s_addc_u32 s9, s9, func.return@rel32@hi+12 +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-NEXT: global_store_dword v1, v0, s[34:35] +; GISEL-NEXT: s_endpgm %rv = call i32 @func.return(i32 0) store i32 %rv, ptr addrspace(1) %ptr ret void @@ -99,22 +155,39 @@ define amdgpu_kernel void @call_no_wait_after_call_return_val(ptr addrspace(1) % ; Need to wait for the address dependency define amdgpu_kernel void @call_got_load(ptr addrspace(1) %ptr, i32) #0 { -; GCN-LABEL: call_got_load: -; GCN: ; %bb.0: -; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11 -; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 -; GCN-NEXT: s_add_u32 s0, s0, s11 -; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: s_getpc_b64 s[6:7] -; GCN-NEXT: s_add_u32 s6, s6, got.func@gotpcrel32@lo+4 -; GCN-NEXT: s_addc_u32 s7, s7, got.func@gotpcrel32@hi+12 -; GCN-NEXT: s_load_dwordx2 s[8:9], s[6:7], 0x0 -; GCN-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-NEXT: v_mov_b32_e32 v0, 0 -; GCN-NEXT: s_mov_b32 s32, 0 -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GCN-NEXT: s_endpgm +; SDAG-LABEL: call_got_load: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_add_u32 flat_scratch_lo, s8, s11 +; SDAG-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 +; SDAG-NEXT: s_add_u32 s0, s0, s11 +; SDAG-NEXT: s_addc_u32 s1, s1, 0 +; SDAG-NEXT: s_getpc_b64 s[6:7] +; SDAG-NEXT: s_add_u32 s6, s6, got.func@gotpcrel32@lo+4 +; SDAG-NEXT: s_addc_u32 s7, s7, got.func@gotpcrel32@hi+12 +; SDAG-NEXT: s_load_dwordx2 s[8:9], s[6:7], 0x0 +; SDAG-NEXT: s_mov_b64 s[6:7], s[4:5] +; SDAG-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-NEXT: s_mov_b32 s32, 0 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: s_swappc_b64 s[30:31], s[8:9] +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: call_got_load: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_add_u32 flat_scratch_lo, s8, s11 +; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 +; GISEL-NEXT: s_add_u32 s0, s0, s11 +; GISEL-NEXT: s_addc_u32 s1, s1, 0 +; GISEL-NEXT: s_getpc_b64 s[6:7] +; GISEL-NEXT: s_add_u32 s6, s6, got.func@gotpcrel32@lo+4 +; GISEL-NEXT: s_addc_u32 s7, s7, got.func@gotpcrel32@hi+12 +; GISEL-NEXT: s_load_dwordx2 s[8:9], s[6:7], 0x0 +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] +; GISEL-NEXT: s_mov_b32 s32, 0 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GISEL-NEXT: s_endpgm call void @got.func(i32 0) ret void }