-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[NFC][AMDGPU] Autogenerating test cases #124507
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-amdgpu Author: Aaditya (easyonaadit) ChangesPatch is 180.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/124507.diff 3 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
index 6fb071dd42d2ff..3241a76d46a1e0 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
@@ -1,123 +1,177 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,MUBUF %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,MUBUF %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-flat-scratch < %s | FileCheck -enable-var-scope -check-prefixes=GCN,FLATSCR %s
-; GCN-LABEL: {{^}}callee_no_stack:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
define void @callee_no_stack() #0 {
+; GCN-LABEL: callee_no_stack:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
ret void
}
-; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim_all:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt
-; MUBUF-NEXT: s_mov_b32 [[FP_COPY:s4]], s33
-; FLATSCR-NEXT: s_mov_b32 [[FP_COPY:s0]], s33
-; GCN-NEXT: s_mov_b32 s33, s32
-; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
-; GCN-NEXT: s_setpc_b64
define void @callee_no_stack_no_fp_elim_all() #1 {
+; MUBUF-LABEL: callee_no_stack_no_fp_elim_all:
+; MUBUF: ; %bb.0:
+; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT: s_mov_b32 s4, s33
+; MUBUF-NEXT: s_mov_b32 s33, s32
+; MUBUF-NEXT: s_mov_b32 s33, s4
+; MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: callee_no_stack_no_fp_elim_all:
+; FLATSCR: ; %bb.0:
+; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT: s_mov_b32 s0, s33
+; FLATSCR-NEXT: s_mov_b32 s33, s32
+; FLATSCR-NEXT: s_mov_b32 s33, s0
+; FLATSCR-NEXT: s_setpc_b64 s[30:31]
ret void
}
-; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim_nonleaf:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
define void @callee_no_stack_no_fp_elim_nonleaf() #2 {
+; GCN-LABEL: callee_no_stack_no_fp_elim_nonleaf:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
ret void
}
-; GCN-LABEL: {{^}}callee_with_stack:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
-; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32{{$}}
-; FLATSCR-NEXT: scratch_store_dword off, v0, s32
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
define void @callee_with_stack() #0 {
+; MUBUF-LABEL: callee_with_stack:
+; MUBUF: ; %bb.0:
+; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT: v_mov_b32_e32 v0, 0
+; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32
+; MUBUF-NEXT: s_waitcnt vmcnt(0)
+; MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: callee_with_stack:
+; FLATSCR: ; %bb.0:
+; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
+; FLATSCR-NEXT: scratch_store_dword off, v0, s32
+; FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; FLATSCR-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca i32, addrspace(5)
store volatile i32 0, ptr addrspace(5) %alloca
ret void
}
; Can use free call clobbered register to preserve original FP value.
-
-; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_all:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt
-; MUBUF-NEXT: s_mov_b32 [[FP_COPY:s4]], s33
-; FLATSCR-NEXT: s_mov_b32 [[FP_COPY:s0]], s33
-; GCN-NEXT: s_mov_b32 s33, s32
-; MUBUF-NEXT: s_addk_i32 s32, 0x200
-; FLATSCR-NEXT: s_add_i32 s32, s32, 8
-; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
-; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33{{$}}
-; FLATSCR-NEXT: scratch_store_dword off, v0, s33{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; MUBUF-NEXT: s_mov_b32 s32, s33
-; FLATSCR-NEXT: s_mov_b32 s32, s33
-; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
-; GCN-NEXT: s_setpc_b64
define void @callee_with_stack_no_fp_elim_all() #1 {
+; MUBUF-LABEL: callee_with_stack_no_fp_elim_all:
+; MUBUF: ; %bb.0:
+; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT: s_mov_b32 s4, s33
+; MUBUF-NEXT: s_mov_b32 s33, s32
+; MUBUF-NEXT: s_addk_i32 s32, 0x200
+; MUBUF-NEXT: v_mov_b32_e32 v0, 0
+; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33
+; MUBUF-NEXT: s_waitcnt vmcnt(0)
+; MUBUF-NEXT: s_mov_b32 s32, s33
+; MUBUF-NEXT: s_mov_b32 s33, s4
+; MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: callee_with_stack_no_fp_elim_all:
+; FLATSCR: ; %bb.0:
+; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT: s_mov_b32 s0, s33
+; FLATSCR-NEXT: s_mov_b32 s33, s32
+; FLATSCR-NEXT: s_add_i32 s32, s32, 8
+; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
+; FLATSCR-NEXT: scratch_store_dword off, v0, s33
+; FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; FLATSCR-NEXT: s_mov_b32 s32, s33
+; FLATSCR-NEXT: s_mov_b32 s33, s0
+; FLATSCR-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca i32, addrspace(5)
store volatile i32 0, ptr addrspace(5) %alloca
ret void
}
-; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_non_leaf:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
-; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32{{$}}
-; FLATSCR-NEXT: scratch_store_dword off, v0, s32{{$}}
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
define void @callee_with_stack_no_fp_elim_non_leaf() #2 {
+; MUBUF-LABEL: callee_with_stack_no_fp_elim_non_leaf:
+; MUBUF: ; %bb.0:
+; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT: v_mov_b32_e32 v0, 0
+; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32
+; MUBUF-NEXT: s_waitcnt vmcnt(0)
+; MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: callee_with_stack_no_fp_elim_non_leaf:
+; FLATSCR: ; %bb.0:
+; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
+; FLATSCR-NEXT: scratch_store_dword off, v0, s32
+; FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; FLATSCR-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca i32, addrspace(5)
store volatile i32 0, ptr addrspace(5) %alloca
ret void
}
-; GCN-LABEL: {{^}}callee_with_stack_and_call:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33
-; GCN-NEXT: s_mov_b32 s33, s32
-; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s33 offset:4 ; 4-byte Folded Spill
-; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
-; GCN: v_writelane_b32 [[CSR_VGPR]], [[FP_SCRATCH_COPY]], 2
-; MUBUF-DAG: s_addk_i32 s32, 0x400{{$}}
-; FLATSCR-DAG: s_add_i32 s32, s32, 16{{$}}
-; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30,
-; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31,
-
-; MUBUF-DAG: buffer_store_dword [[ZERO]], off, s[0:3], s33{{$}}
-; FLATSCR-DAG: scratch_store_dword off, [[ZERO]], s33{{$}}
-
-; GCN: s_swappc_b64
-
-; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]]
-; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]]
-
-; MUBUF: s_mov_b32 s32, s33{{$}}
-; FLATSCR: s_mov_b32 s32, s33{{$}}
-; GCN-NEXT: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], [[CSR_VGPR]], 2
-; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s33 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
-; GCN-NEXT: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
-; GCN-NEXT: s_waitcnt vmcnt(0)
-
-; GCN-NEXT: s_setpc_b64 s[30:31]
define void @callee_with_stack_and_call() #0 {
+; MUBUF-LABEL: callee_with_stack_and_call:
+; MUBUF: ; %bb.0:
+; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT: s_mov_b32 s16, s33
+; MUBUF-NEXT: s_mov_b32 s33, s32
+; MUBUF-NEXT: s_or_saveexec_b64 s[18:19], -1
+; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; MUBUF-NEXT: s_mov_b64 exec, s[18:19]
+; MUBUF-NEXT: v_writelane_b32 v40, s16, 2
+; MUBUF-NEXT: s_addk_i32 s32, 0x400
+; MUBUF-NEXT: v_writelane_b32 v40, s30, 0
+; MUBUF-NEXT: v_mov_b32_e32 v0, 0
+; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
+; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33
+; MUBUF-NEXT: s_waitcnt vmcnt(0)
+; MUBUF-NEXT: s_getpc_b64 s[16:17]
+; MUBUF-NEXT: s_add_u32 s16, s16, external_void_func_void@rel32@lo+4
+; MUBUF-NEXT: s_addc_u32 s17, s17, external_void_func_void@rel32@hi+12
+; MUBUF-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
+; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
+; MUBUF-NEXT: s_mov_b32 s32, s33
+; MUBUF-NEXT: v_readlane_b32 s4, v40, 2
+; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
+; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
+; MUBUF-NEXT: s_mov_b32 s33, s4
+; MUBUF-NEXT: s_waitcnt vmcnt(0)
+; MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: callee_with_stack_and_call:
+; FLATSCR: ; %bb.0:
+; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT: s_mov_b32 s0, s33
+; FLATSCR-NEXT: s_mov_b32 s33, s32
+; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
+; FLATSCR-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill
+; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
+; FLATSCR-NEXT: v_writelane_b32 v40, s0, 2
+; FLATSCR-NEXT: s_add_i32 s32, s32, 16
+; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
+; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
+; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
+; FLATSCR-NEXT: scratch_store_dword off, v0, s33
+; FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; FLATSCR-NEXT: s_getpc_b64 s[0:1]
+; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
+; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
+; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
+; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
+; FLATSCR-NEXT: s_mov_b32 s32, s33
+; FLATSCR-NEXT: v_readlane_b32 s0, v40, 2
+; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
+; FLATSCR-NEXT: scratch_load_dword v40, off, s33 offset:4 ; 4-byte Folded Reload
+; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
+; FLATSCR-NEXT: s_mov_b32 s33, s0
+; FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; FLATSCR-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca i32, addrspace(5)
store volatile i32 0, ptr addrspace(5) %alloca
call void @external_void_func_void()
@@ -130,36 +184,60 @@ define void @callee_with_stack_and_call() #0 {
; There is stack usage only because of the need to evict a VGPR for
; spilling CSR SGPRs.
-; GCN-LABEL: {{^}}callee_no_stack_with_call:
-; GCN: s_waitcnt
-; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33
-; GCN-NEXT: s_mov_b32 s33, s32
-; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s33 ; 4-byte Folded Spill
-; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s33 ; 4-byte Folded Spill
-; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
-; MUBUF-DAG: s_addk_i32 s32, 0x400
-; FLATSCR-DAG: s_add_i32 s32, s32, 16
-; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], [[FP_SCRATCH_COPY]], [[FP_SPILL_LANE:[0-9]+]]
-
-; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, 0
-; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1
-; GCN: s_swappc_b64
-
-; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]], 0
-; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]], 1
-
-; MUBUF: s_mov_b32 s32, s33
-; FLATSCR: s_mov_b32 s32, s33
-; GCN-NEXT: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], [[CSR_VGPR]], [[FP_SPILL_LANE]]
-; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s33 ; 4-byte Folded Reload
-; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s33 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
-; GCN-NEXT: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: s_setpc_b64 s[30:31]
define void @callee_no_stack_with_call() #0 {
+; MUBUF-LABEL: callee_no_stack_with_call:
+; MUBUF: ; %bb.0:
+; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT: s_mov_b32 s16, s33
+; MUBUF-NEXT: s_mov_b32 s33, s32
+; MUBUF-NEXT: s_or_saveexec_b64 s[18:19], -1
+; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; MUBUF-NEXT: s_mov_b64 exec, s[18:19]
+; MUBUF-NEXT: v_writelane_b32 v40, s16, 2
+; MUBUF-NEXT: s_addk_i32 s32, 0x400
+; MUBUF-NEXT: v_writelane_b32 v40, s30, 0
+; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
+; MUBUF-NEXT: s_getpc_b64 s[16:17]
+; MUBUF-NEXT: s_add_u32 s16, s16, external_void_func_void@rel32@lo+4
+; MUBUF-NEXT: s_addc_u32 s17, s17, external_void_func_void@rel32@hi+12
+; MUBUF-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
+; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
+; MUBUF-NEXT: s_mov_b32 s32, s33
+; MUBUF-NEXT: v_readlane_b32 s4, v40, 2
+; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
+; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
+; MUBUF-NEXT: s_mov_b32 s33, s4
+; MUBUF-NEXT: s_waitcnt vmcnt(0)
+; MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: callee_no_stack_with_call:
+; FLATSCR: ; %bb.0:
+; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT: s_mov_b32 s0, s33
+; FLATSCR-NEXT: s_mov_b32 s33, s32
+; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
+; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
+; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
+; FLATSCR-NEXT: v_writelane_b32 v40, s0, 2
+; FLATSCR-NEXT: s_add_i32 s32, s32, 16
+; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
+; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
+; FLATSCR-NEXT: s_getpc_b64 s[0:1]
+; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
+; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
+; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
+; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
+; FLATSCR-NEXT: s_mov_b32 s32, s33
+; FLATSCR-NEXT: v_readlane_b32 s0, v40, 2
+; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
+; FLATSCR-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
+; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
+; FLATSCR-NEXT: s_mov_b32 s33, s0
+; FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; FLATSCR-NEXT: s_setpc_b64 s[30:31]
call void @external_void_func_void()
ret void
}
@@ -168,26 +246,306 @@ declare hidden void @external_void_func_void() #0
; Make sure if a CSR vgpr is used for SGPR spilling, it is saved and
; restored. No FP is required.
-;
-; GCN-LABEL: {{^}}callee_func_sgpr_spill_no_calls:
-; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 ; 4-byte Folded Spill
-; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 ; 4-byte Folded Spill
-; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
-; GCN: v_writelane_b32 [[CSR_VGPR]], s
-; GCN: v_writelane_b32 [[CSR_VGPR]], s
-
-; GCN: ;;#ASMSTART
-; GCN: v_readlane_b32 s{{[0-9]+}}, [[CSR_VGPR]]
-; GCN: v_readlane_b32 s{{[0-9]+}}, [[CSR_VGPR]]
-
-; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 ; 4-byte Folded Reload
-; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
+; MUBUF-LABEL: callee_func_sgpr_spill_no_calls:
+; MUBUF: ; %bb.0:
+; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1
+; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
+; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
+; MUBUF-NEXT: v_writelane_b32 v40, s36, 0
+; MUBUF-NEXT: v_writelane_b32 v40, s37, 1
+; MUBUF-NEXT: v_writelane_b32 v40, s38, 2
+; MUBUF-NEXT: v_writelane_b32 v40, s39, 3
+; MUBUF-NEXT: v_writelane_b32 v40, s40, 4
+; MUBUF-NEXT: v_writelane_b32 v40, s41, 5
+; MUBUF-NEXT: v_writelane_b32 v40, s42, 6
+; MUBUF-NEXT: v_writelane_b32 v40, s43, 7
+; MUBUF-NEXT: v_writelane_b32 v40, s44, 8
+; MUBUF-NEXT: v_writelane_b32 v40, s45, 9
+; MUBUF-NEXT: v_writelane_b32 v40, s46, 10
+; MUBUF-NEXT: v_writelane_b32 v40, s47, 11
+; MUBUF-NEXT: v_writelane_b32 v40, s48, 12
+; MUBUF-NEXT: v_writelane_b32 v40, s49, 13
+; MUBUF-NEXT: v_writelane_b32 v40, s50, 14
+; MUBUF-NEXT: v_writelane_b32 v40, s51, 15
+; MUBUF-NEXT: v_writelane_b32 v40, s52, 16
+; MUBUF-NEXT: v_writelane_b32 v40, s53, 17
+; MUBUF-NEXT: v_writelane_b32 v40, s54, 18
+; MUBUF-NEXT: v_writelane_b32 v40, s55, 19
+; MUBUF-NEXT: v_writelane_b32 v40, s56, 20
+; MUBUF-NEXT: v_writelane_b32 v40, s57, 21
+; MUBUF-NEXT: v_writelane_b32 v40, s58, 22
+; MUBUF-NEXT: v_writelane_b32 v40, s59, 23
+; MUBUF-NEXT: v_writelane_b32 v40, s60, 24
+; MUBUF-NEXT: v_writelane_b32 v40, s61, 25
+; MUBUF-NEXT: v_writelane_b32 v40, s62, 26
+; MUBUF-NEXT: v_writelane_b32 v40, s63, 27
+; MUBUF-NEXT: v_writelane_b32 v40, s64, 28
+; MUBUF-NEXT: v_writelane_b32 v40, s65, 29
+; MUBUF-NEXT: v_writelane_b32 v40, s66, 30
+; MUBUF-NEXT: v_writelane_b32 v40, s67, 31
+; MUBUF-NEXT: v_writelane_b32 v40, s68, 32
+; MUBUF-NEXT: v_writelane_b32 v40, s69, 33
+; MUBUF-NEXT: v_writelane_b32 v40, s70, 34
+; MUBUF-NEXT: v_writelane_b32 v40, s71, 35
+; MUBUF-NEXT: v_writelane_b32 v40, s72, 36
+; MUBUF-NEXT: v_writelane_b32 v40, s73, 37
+; MUBUF-NEXT: v_writelane_b32 v40, s74, 38
+; MUBUF-NEXT: v_writelane_b32 v40, s75, 39
+; MUBUF-NEXT: v_writelane_b32 v40, s76, 40
+; MUBUF-NEXT: v_writelane_b32 v40, s77, 41
+; MUBUF-NEXT: v_writelane_b32 v40, s78, 42
+; MUBUF-NEXT: v_writelane_b32 v40, s79, 43
+; MUBUF-NEXT: v_writelane_b32 v40, s80, 44
+; MUBUF-NEXT: v_writelane_b32 v40, s81, 45
+; MUBUF-NEXT: v_writelane_b32 v40, s82, 46
+; MUBUF-NEXT: v_writelane_b32 v40, s83, 47
+; MUBUF-NEXT: ;;#ASMSTART
+; MUBUF-NEXT: ;;#ASMEND
+; MUBUF-NEXT: ;;#ASMSTART
+; MUBUF-NEXT: ;;#ASMEND
+; MUBUF-NEXT: ;;#ASMSTART
+; MUBUF-NEXT: ;;#ASMEND
+; MUBUF-NEXT: ;;#ASMSTART
+; MUBUF-NEXT: ;;#ASMEND
+; MUBUF-NEXT: ;;#ASMSTART
+; MUBUF-NEXT: ;;#ASMEND
+; MUBUF-NEXT: ;;#ASMSTART
+; MUBUF-NEXT: ; def s[68:83]
+; MUBUF-NEXT: ;;#ASMEND
+; MUBUF-NEXT: ;;#ASMSTART
+; MUBUF-NEXT: ; def s[52:67]
+; MUBUF-NEXT: ;;#ASMEND
+; MUBUF-NEXT: ;;#ASMSTART
+; MUBUF-NEXT: ; def s[36:51]
+; MUBUF-NEXT: ;;#ASMEND
+; MUBUF-NEXT: ;;#ASMSTART
+; MUBUF-NEXT: ; def s[4:19]
+; MUBUF-NEXT: ;;#ASMEND
+; MUBUF-NEXT: ;;#ASMSTART
+; MUBUF-NEXT: ; def s[20:27]
+; MUBUF-NEXT: ;;#ASMEND
+; MUBUF-NEXT: ;;#ASMSTART
+; MUBUF-NEXT: ; def s[28:29]
+; MUBUF-NEXT: ;;#ASMEND
+; MUBUF-NEXT: ;;#ASMSTART
+; MUBUF-NEXT: ; use s[68:83]
+; MUBUF-NEXT: ;;#ASMEND
+; MUBUF-NEXT: ;;#ASMSTART
+; MUBUF-NEXT: ; use s[52:67]
+; MUBUF-NEXT: ;;#ASMEND
+; MUBUF-NEXT: ;;#ASMSTART
+; MUBUF-NEXT: ; use s[36:51]
+; MUBUF-NEXT: ;;#ASMEND
+; MUBUF-NEXT: ;;#ASMSTART
+; MUBUF-NEXT: ; use s[20:27]
+; MUBUF-NEXT: ;;#ASMEND
+; MUBUF-NEXT: ;;#ASMSTART
+; MUBUF-NEXT: ; use s[28:29]
+; MUBUF-NEXT: ;;#ASMEND
+; MUBUF-NEXT: ;;#ASMSTART
+; MUBUF-NEXT: ; use s[4:19]
+; MUBUF-NEXT: ;;#ASMEND
+; MUBUF-NEXT: v_readlane_b32 s83, v40, 47
+; MUBUF-NEXT: v_readlane_b32 s82, v40, 46
+; MUBUF-NEXT: v_readlane_b32 s81, v40, 45
+; MUBUF-NEXT: v_readlane_b32 s80, v40, 44
+; MUBUF-NEXT: v_readlane_b32 s79, v40, 43
+; MUBUF-NEXT: v_readlane_b32 s78, v40, 42
+; MUBUF-NEXT: v_readlane_b32 s77, v40, 41
+; ...
[truncated]
|
easyonaadit
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I updated the run lines in the tests, now its checking all the targets.
|
I have modified the run lines on the tests, now its checking for all targets. |
No description provided.