Skip to content

Commit c8231aa

Browse files
committed
[NFC][AMDGPU] Convert amdpal.ll to autogenerated CHECK lines
1 parent 5c37840 commit c8231aa

File tree

1 file changed

+83
-35
lines changed

1 file changed

+83
-35
lines changed

llvm/test/CodeGen/AMDGPU/amdpal.ll

Lines changed: 83 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,23 @@
1-
; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=tahiti | FileCheck --check-prefixes=PAL,CI --enable-var-scope %s
2-
; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=tonga | FileCheck --check-prefixes=PAL,VI --enable-var-scope %s
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1100 | FileCheck --check-prefixes=PAL,CI --enable-var-scope %s
3+
; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1200 | FileCheck --check-prefixes=PAL,VI --enable-var-scope %s
34

4-
; PAL-NOT: .AMDGPU.config
5-
; PAL-LABEL: {{^}}simple:
65
define amdgpu_kernel void @simple(ptr addrspace(1) %out) {
6+
; CI-LABEL: simple:
7+
; CI: ; %bb.0: ; %entry
8+
; CI-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
9+
; CI-NEXT: v_mov_b32_e32 v0, 0
10+
; CI-NEXT: s_waitcnt lgkmcnt(0)
11+
; CI-NEXT: global_store_b32 v0, v0, s[0:1]
12+
; CI-NEXT: s_endpgm
13+
;
14+
; VI-LABEL: simple:
15+
; VI: ; %bb.0: ; %entry
16+
; VI-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
17+
; VI-NEXT: v_mov_b32_e32 v0, 0
18+
; VI-NEXT: s_wait_kmcnt 0x0
19+
; VI-NEXT: global_store_b32 v0, v0, s[0:1]
20+
; VI-NEXT: s_endpgm
721
entry:
822
store i32 0, ptr addrspace(1) %out
923
ret void
@@ -12,13 +26,28 @@ entry:
1226
; Check code sequence for amdpal use of scratch for alloca. This is the case
1327
; where the high half of the address comes from s_getpc.
1428

15-
; PAL-LABEL: {{^}}scratch:
16-
; PAL: s_getpc_b64 s[[[GITPTR:[0-9]+]]:
17-
; PAL: s_mov_b32 s[[GITPTR]], s0
18-
; PAL: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:
19-
; PAL: buffer_store{{.*}}, s[[[SCRATCHDESC]]:
20-
2129
define amdgpu_kernel void @scratch(<2 x i32> %in, i32 %idx, ptr addrspace(5) %out) {
30+
; CI-LABEL: scratch:
31+
; CI: ; %bb.0: ; %entry
32+
; CI-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
33+
; CI-NEXT: s_waitcnt lgkmcnt(0)
34+
; CI-NEXT: s_cmp_eq_u32 s2, 1
35+
; CI-NEXT: s_cselect_b32 s0, s1, s0
36+
; CI-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
37+
; CI-NEXT: v_mov_b32_e32 v0, s0
38+
; CI-NEXT: scratch_store_b32 off, v0, s3
39+
; CI-NEXT: s_endpgm
40+
;
41+
; VI-LABEL: scratch:
42+
; VI: ; %bb.0: ; %entry
43+
; VI-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
44+
; VI-NEXT: s_wait_kmcnt 0x0
45+
; VI-NEXT: s_cmp_eq_u32 s2, 1
46+
; VI-NEXT: s_cselect_b32 s0, s1, s0
47+
; VI-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
48+
; VI-NEXT: v_mov_b32_e32 v0, s0
49+
; VI-NEXT: scratch_store_b32 off, v0, s3
50+
; VI-NEXT: s_endpgm
2251
entry:
2352
%v = alloca [2 x i32], addrspace(5)
2453
store <2 x i32> %in, ptr addrspace(5) %v
@@ -35,13 +64,28 @@ entry:
3564
; that the s_movk_i32 is into a reg that is one more than the following
3665
; s_mov_b32.
3766

38-
; PAL-LABEL: {{^}}scratch2:
39-
; PAL: s_movk_i32 s{{[0-9]+}}, 0x1234
40-
; PAL: s_mov_b32 s[[GITPTR:[0-9]+]], s0
41-
; PAL: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:
42-
; PAL: buffer_store{{.*}}, s[[[SCRATCHDESC]]:
43-
4467
define amdgpu_kernel void @scratch2(<2 x i32> %in, i32 %idx, ptr addrspace(5) %out) #0 {
68+
; CI-LABEL: scratch2:
69+
; CI: ; %bb.0: ; %entry
70+
; CI-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
71+
; CI-NEXT: s_waitcnt lgkmcnt(0)
72+
; CI-NEXT: s_cmp_eq_u32 s2, 1
73+
; CI-NEXT: s_cselect_b32 s0, s1, s0
74+
; CI-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
75+
; CI-NEXT: v_mov_b32_e32 v0, s0
76+
; CI-NEXT: scratch_store_b32 off, v0, s3
77+
; CI-NEXT: s_endpgm
78+
;
79+
; VI-LABEL: scratch2:
80+
; VI: ; %bb.0: ; %entry
81+
; VI-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
82+
; VI-NEXT: s_wait_kmcnt 0x0
83+
; VI-NEXT: s_cmp_eq_u32 s2, 1
84+
; VI-NEXT: s_cselect_b32 s0, s1, s0
85+
; VI-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
86+
; VI-NEXT: v_mov_b32_e32 v0, s0
87+
; VI-NEXT: scratch_store_b32 off, v0, s3
88+
; VI-NEXT: s_endpgm
4589
entry:
4690
%v = alloca [2 x i32], addrspace(5)
4791
store <2 x i32> %in, ptr addrspace(5) %v
@@ -56,14 +100,28 @@ entry:
56100
; 0 in a graphics shader.
57101
; Prior to GCN3 s_load_dword offsets are dwords, so the offset will be 0x4.
58102

59-
; PAL-LABEL: {{^}}scratch2_cs:
60-
; PAL: s_movk_i32 s{{[0-9]+}}, 0x1234
61-
; PAL: s_mov_b32 s[[GITPTR:[0-9]+]], s0
62-
; CI: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:{{[0-9]+\]}}, 0x4
63-
; VI: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:{{[0-9]+\]}}, 0x10
64-
; PAL: buffer_store{{.*}}, s[[[SCRATCHDESC]]:
65-
66103
define amdgpu_cs void @scratch2_cs(i32 inreg, i32 inreg, i32 inreg, <3 x i32> inreg, i32 inreg, <3 x i32> %coord, <2 x i32> %in, i32 %extra, i32 %idx) #0 {
104+
; CI-LABEL: scratch2_cs:
105+
; CI: ; %bb.0: ; %entry
106+
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v6
107+
; CI-NEXT: v_mov_b32_e32 v2, v5
108+
; CI-NEXT: s_delay_alu instid0(VALU_DEP_2)
109+
; CI-NEXT: v_add3_u32 v0, 0, v0, 4
110+
; CI-NEXT: scratch_store_b96 off, v[2:4], off
111+
; CI-NEXT: scratch_load_b32 v0, v0, off
112+
; CI-NEXT: s_waitcnt vmcnt(0)
113+
; CI-NEXT: buffer_store_b32 v0, off, s[0:3], 0
114+
; CI-NEXT: s_endpgm
115+
;
116+
; VI-LABEL: scratch2_cs:
117+
; VI: ; %bb.0: ; %entry
118+
; VI-NEXT: v_mov_b32_e32 v2, v5
119+
; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v6
120+
; VI-NEXT: scratch_store_b96 off, v[2:4], off
121+
; VI-NEXT: scratch_load_b32 v0, v0, off offset:4
122+
; VI-NEXT: s_wait_loadcnt 0x0
123+
; VI-NEXT: buffer_store_b32 v0, off, s[0:3], null
124+
; VI-NEXT: s_endpgm
67125
entry:
68126
%v = alloca [3 x i32], addrspace(5)
69127
%v1 = getelementptr [3 x i32], ptr addrspace(5) %v, i32 0, i32 1
@@ -79,15 +137,5 @@ entry:
79137
attributes #0 = { nounwind "amdgpu-git-ptr-high"="0x1234" }
80138

81139
declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
82-
83-
84-
; PAL: .amdgpu_pal_metadata
85-
; PAL-NEXT: ---
86-
; PAL-NEXT: amdpal.pipelines:
87-
; PAL-NEXT: - .hardware_stages:
88-
; PAL-NEXT: .cs:
89-
; PAL-NEXT: .entry_point: _amdgpu_cs_main
90-
; PAL-NEXT: .entry_point_symbol: scratch2_cs
91-
; PAL-NEXT: .scratch_memory_size: 0x10
92-
; PAL-NEXT: .sgpr_count: 0x
93-
; PAL-NEXT: .vgpr_count: 0x
140+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
141+
; PAL: {{.*}}

0 commit comments

Comments
 (0)