-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[NFC][AMDGPU] Convert amdpal.ll to autogenerated CHECK lines #141026
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-amdgpu Author: Harrison Hao (harrisonGPU) ChangesFull diff: https://github.com/llvm/llvm-project/pull/141026.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal.ll b/llvm/test/CodeGen/AMDGPU/amdpal.ll
index 2e47b0163aa8c..a97732b2f39a5 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal.ll
@@ -1,9 +1,23 @@
-; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=tahiti | FileCheck --check-prefixes=PAL,CI --enable-var-scope %s
-; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=tonga | FileCheck --check-prefixes=PAL,VI --enable-var-scope %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1100 | FileCheck --check-prefixes=PAL,CI --enable-var-scope %s
+; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1200 | FileCheck --check-prefixes=PAL,VI --enable-var-scope %s
-; PAL-NOT: .AMDGPU.config
-; PAL-LABEL: {{^}}simple:
define amdgpu_kernel void @simple(ptr addrspace(1) %out) {
+; CI-LABEL: simple:
+; CI: ; %bb.0: ; %entry
+; CI-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CI-NEXT: v_mov_b32_e32 v0, 0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: global_store_b32 v0, v0, s[0:1]
+; CI-NEXT: s_endpgm
+;
+; VI-LABEL: simple:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; VI-NEXT: v_mov_b32_e32 v0, 0
+; VI-NEXT: s_wait_kmcnt 0x0
+; VI-NEXT: global_store_b32 v0, v0, s[0:1]
+; VI-NEXT: s_endpgm
entry:
store i32 0, ptr addrspace(1) %out
ret void
@@ -12,13 +26,28 @@ entry:
; Check code sequence for amdpal use of scratch for alloca. This is the case
; where the high half of the address comes from s_getpc.
-; PAL-LABEL: {{^}}scratch:
-; PAL: s_getpc_b64 s[[[GITPTR:[0-9]+]]:
-; PAL: s_mov_b32 s[[GITPTR]], s0
-; PAL: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:
-; PAL: buffer_store{{.*}}, s[[[SCRATCHDESC]]:
-
define amdgpu_kernel void @scratch(<2 x i32> %in, i32 %idx, ptr addrspace(5) %out) {
+; CI-LABEL: scratch:
+; CI: ; %bb.0: ; %entry
+; CI-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_cmp_eq_u32 s2, 1
+; CI-NEXT: s_cselect_b32 s0, s1, s0
+; CI-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CI-NEXT: v_mov_b32_e32 v0, s0
+; CI-NEXT: scratch_store_b32 off, v0, s3
+; CI-NEXT: s_endpgm
+;
+; VI-LABEL: scratch:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; VI-NEXT: s_wait_kmcnt 0x0
+; VI-NEXT: s_cmp_eq_u32 s2, 1
+; VI-NEXT: s_cselect_b32 s0, s1, s0
+; VI-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: scratch_store_b32 off, v0, s3
+; VI-NEXT: s_endpgm
entry:
%v = alloca [2 x i32], addrspace(5)
store <2 x i32> %in, ptr addrspace(5) %v
@@ -35,13 +64,28 @@ entry:
; that the s_movk_i32 is into a reg that is one more than the following
; s_mov_b32.
-; PAL-LABEL: {{^}}scratch2:
-; PAL: s_movk_i32 s{{[0-9]+}}, 0x1234
-; PAL: s_mov_b32 s[[GITPTR:[0-9]+]], s0
-; PAL: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:
-; PAL: buffer_store{{.*}}, s[[[SCRATCHDESC]]:
-
define amdgpu_kernel void @scratch2(<2 x i32> %in, i32 %idx, ptr addrspace(5) %out) #0 {
+; CI-LABEL: scratch2:
+; CI: ; %bb.0: ; %entry
+; CI-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_cmp_eq_u32 s2, 1
+; CI-NEXT: s_cselect_b32 s0, s1, s0
+; CI-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CI-NEXT: v_mov_b32_e32 v0, s0
+; CI-NEXT: scratch_store_b32 off, v0, s3
+; CI-NEXT: s_endpgm
+;
+; VI-LABEL: scratch2:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; VI-NEXT: s_wait_kmcnt 0x0
+; VI-NEXT: s_cmp_eq_u32 s2, 1
+; VI-NEXT: s_cselect_b32 s0, s1, s0
+; VI-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: scratch_store_b32 off, v0, s3
+; VI-NEXT: s_endpgm
entry:
%v = alloca [2 x i32], addrspace(5)
store <2 x i32> %in, ptr addrspace(5) %v
@@ -56,14 +100,28 @@ entry:
; 0 in a graphics shader.
; Prior to GCN3 s_load_dword offsets are dwords, so the offset will be 0x4.
-; PAL-LABEL: {{^}}scratch2_cs:
-; PAL: s_movk_i32 s{{[0-9]+}}, 0x1234
-; PAL: s_mov_b32 s[[GITPTR:[0-9]+]], s0
-; CI: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:{{[0-9]+\]}}, 0x4
-; VI: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:{{[0-9]+\]}}, 0x10
-; PAL: buffer_store{{.*}}, s[[[SCRATCHDESC]]:
-
define amdgpu_cs void @scratch2_cs(i32 inreg, i32 inreg, i32 inreg, <3 x i32> inreg, i32 inreg, <3 x i32> %coord, <2 x i32> %in, i32 %extra, i32 %idx) #0 {
+; CI-LABEL: scratch2_cs:
+; CI: ; %bb.0: ; %entry
+; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v6
+; CI-NEXT: v_mov_b32_e32 v2, v5
+; CI-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; CI-NEXT: v_add3_u32 v0, 0, v0, 4
+; CI-NEXT: scratch_store_b96 off, v[2:4], off
+; CI-NEXT: scratch_load_b32 v0, v0, off
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; CI-NEXT: s_endpgm
+;
+; VI-LABEL: scratch2_cs:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: v_mov_b32_e32 v2, v5
+; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v6
+; VI-NEXT: scratch_store_b96 off, v[2:4], off
+; VI-NEXT: scratch_load_b32 v0, v0, off offset:4
+; VI-NEXT: s_wait_loadcnt 0x0
+; VI-NEXT: buffer_store_b32 v0, off, s[0:3], null
+; VI-NEXT: s_endpgm
entry:
%v = alloca [3 x i32], addrspace(5)
%v1 = getelementptr [3 x i32], ptr addrspace(5) %v, i32 0, i32 1
@@ -79,15 +137,5 @@ entry:
attributes #0 = { nounwind "amdgpu-git-ptr-high"="0x1234" }
declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
-
-
-; PAL: .amdgpu_pal_metadata
-; PAL-NEXT: ---
-; PAL-NEXT: amdpal.pipelines:
-; PAL-NEXT: - .hardware_stages:
-; PAL-NEXT: .cs:
-; PAL-NEXT: .entry_point: _amdgpu_cs_main
-; PAL-NEXT: .entry_point_symbol: scratch2_cs
-; PAL-NEXT: .scratch_memory_size: 0x10
-; PAL-NEXT: .sgpr_count: 0x
-; PAL-NEXT: .vgpr_count: 0x
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; PAL: {{.*}}
|
arsenm
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This test cannot be converted to generated checks. The generated checks only include the instructions in the function body, and none of the metadata. This test only really cares about the metadata
Okay, can I add these Metadata check after using update_llc_test_checks.py? Because I have a patch will chang this lit test, so I updated it. |
The next time someone runs it, the checks will be lost again. There isn't that much in this test, you can probably just manually do it? |
Okay thanks ! |
I think mcpu is tahiti or tonga, may be we can update it? Because I know these are old architecture. |
Add more run lines or a separate test |
Okay thanks. :-) |
| ; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1100 | FileCheck --check-prefixes=PAL,CI --enable-var-scope %s | ||
| ; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1200 | FileCheck --check-prefixes=PAL,VI --enable-var-scope %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This also was sneaking in target change, and the prefix names don't match
No description provided.