Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 130 additions & 0 deletions llvm/test/CodeGen/AMDGPU/call-args-inreg-bfloat.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Splitting this into a new file as GISel does not support the bfloat type

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add a comment to describe the situation here.

; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s

; We've separated this file from call-args-inreg.ll since GlobalISel does not support the bfloat type.
; Ideally, we should merge the two files once that support lands.

declare hidden void @external_void_func_bf16_inreg(bfloat inreg) #0
declare hidden void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg) #0

define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 {
; GFX9-LABEL: test_call_external_void_func_bf16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s17, s33
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_getpc_b64 s[18:19]
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_bf16_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_bf16_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
; GFX9-NEXT: s_mov_b32 s32, s33
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
; GFX9-NEXT: s_mov_b32 s33, s4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_call_external_void_func_bf16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s1, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s2, -1
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s2
; GFX11-NEXT: v_writelane_b32 v40, s1, 2
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: s_getpc_b64 s[2:3]
; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_bf16_inreg@rel32@lo+4
; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_bf16_inreg@rel32@hi+12
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
; GFX11-NEXT: s_mov_b32 s32, s33
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_mov_b32 s33, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
call void @external_void_func_bf16_inreg(bfloat inreg %arg)
ret void
}

define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg) #0 {
; GFX9-LABEL: test_call_external_void_func_v2bf16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s17, s33
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_getpc_b64 s[18:19]
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2bf16_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2bf16_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
; GFX9-NEXT: s_mov_b32 s32, s33
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
; GFX9-NEXT: s_mov_b32 s33, s4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_call_external_void_func_v2bf16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s1, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s2, -1
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s2
; GFX11-NEXT: v_writelane_b32 v40, s1, 2
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: s_getpc_b64 s[2:3]
; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2bf16_inreg@rel32@lo+4
; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2bf16_inreg@rel32@hi+12
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
; GFX11-NEXT: s_mov_b32 s32, s33
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_mov_b32 s33, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
call void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg)
ret void
}

Loading