1- ; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=tahiti | FileCheck --check-prefixes=PAL,CI --enable-var-scope %s
2- ; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=tonga | FileCheck --check-prefixes=PAL,VI --enable-var-scope %s
1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+ ; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1100 | FileCheck --check-prefixes=PAL,CI --enable-var-scope %s
3+ ; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1200 | FileCheck --check-prefixes=PAL,VI --enable-var-scope %s
34
4- ; PAL-NOT: .AMDGPU.config
5- ; PAL-LABEL: {{^}}simple:
65define amdgpu_kernel void @simple (ptr addrspace (1 ) %out ) {
6+ ; CI-LABEL: simple:
7+ ; CI: ; %bb.0: ; %entry
8+ ; CI-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
9+ ; CI-NEXT: v_mov_b32_e32 v0, 0
10+ ; CI-NEXT: s_waitcnt lgkmcnt(0)
11+ ; CI-NEXT: global_store_b32 v0, v0, s[0:1]
12+ ; CI-NEXT: s_endpgm
13+ ;
14+ ; VI-LABEL: simple:
15+ ; VI: ; %bb.0: ; %entry
16+ ; VI-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
17+ ; VI-NEXT: v_mov_b32_e32 v0, 0
18+ ; VI-NEXT: s_wait_kmcnt 0x0
19+ ; VI-NEXT: global_store_b32 v0, v0, s[0:1]
20+ ; VI-NEXT: s_endpgm
721entry:
822 store i32 0 , ptr addrspace (1 ) %out
923 ret void
@@ -12,13 +26,28 @@ entry:
1226; Check code sequence for amdpal use of scratch for alloca. This is the case
1327; where the high half of the address comes from s_getpc.
1428
15- ; PAL-LABEL: {{^}}scratch:
16- ; PAL: s_getpc_b64 s[[[GITPTR:[0-9]+]]:
17- ; PAL: s_mov_b32 s[[GITPTR]], s0
18- ; PAL: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:
19- ; PAL: buffer_store{{.*}}, s[[[SCRATCHDESC]]:
20-
2129define amdgpu_kernel void @scratch (<2 x i32 > %in , i32 %idx , ptr addrspace (5 ) %out ) {
30+ ; CI-LABEL: scratch:
31+ ; CI: ; %bb.0: ; %entry
32+ ; CI-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
33+ ; CI-NEXT: s_waitcnt lgkmcnt(0)
34+ ; CI-NEXT: s_cmp_eq_u32 s2, 1
35+ ; CI-NEXT: s_cselect_b32 s0, s1, s0
36+ ; CI-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
37+ ; CI-NEXT: v_mov_b32_e32 v0, s0
38+ ; CI-NEXT: scratch_store_b32 off, v0, s3
39+ ; CI-NEXT: s_endpgm
40+ ;
41+ ; VI-LABEL: scratch:
42+ ; VI: ; %bb.0: ; %entry
43+ ; VI-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
44+ ; VI-NEXT: s_wait_kmcnt 0x0
45+ ; VI-NEXT: s_cmp_eq_u32 s2, 1
46+ ; VI-NEXT: s_cselect_b32 s0, s1, s0
47+ ; VI-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
48+ ; VI-NEXT: v_mov_b32_e32 v0, s0
49+ ; VI-NEXT: scratch_store_b32 off, v0, s3
50+ ; VI-NEXT: s_endpgm
2251entry:
2352 %v = alloca [2 x i32 ], addrspace (5 )
2453 store <2 x i32 > %in , ptr addrspace (5 ) %v
@@ -35,13 +64,28 @@ entry:
3564; that the s_movk_i32 is into a reg that is one more than the following
3665; s_mov_b32.
3766
38- ; PAL-LABEL: {{^}}scratch2:
39- ; PAL: s_movk_i32 s{{[0-9]+}}, 0x1234
40- ; PAL: s_mov_b32 s[[GITPTR:[0-9]+]], s0
41- ; PAL: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:
42- ; PAL: buffer_store{{.*}}, s[[[SCRATCHDESC]]:
43-
4467define amdgpu_kernel void @scratch2 (<2 x i32 > %in , i32 %idx , ptr addrspace (5 ) %out ) #0 {
68+ ; CI-LABEL: scratch2:
69+ ; CI: ; %bb.0: ; %entry
70+ ; CI-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
71+ ; CI-NEXT: s_waitcnt lgkmcnt(0)
72+ ; CI-NEXT: s_cmp_eq_u32 s2, 1
73+ ; CI-NEXT: s_cselect_b32 s0, s1, s0
74+ ; CI-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
75+ ; CI-NEXT: v_mov_b32_e32 v0, s0
76+ ; CI-NEXT: scratch_store_b32 off, v0, s3
77+ ; CI-NEXT: s_endpgm
78+ ;
79+ ; VI-LABEL: scratch2:
80+ ; VI: ; %bb.0: ; %entry
81+ ; VI-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
82+ ; VI-NEXT: s_wait_kmcnt 0x0
83+ ; VI-NEXT: s_cmp_eq_u32 s2, 1
84+ ; VI-NEXT: s_cselect_b32 s0, s1, s0
85+ ; VI-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
86+ ; VI-NEXT: v_mov_b32_e32 v0, s0
87+ ; VI-NEXT: scratch_store_b32 off, v0, s3
88+ ; VI-NEXT: s_endpgm
4589entry:
4690 %v = alloca [2 x i32 ], addrspace (5 )
4791 store <2 x i32 > %in , ptr addrspace (5 ) %v
@@ -56,14 +100,28 @@ entry:
56100; 0 in a graphics shader.
57101; Prior to GCN3 s_load_dword offsets are dwords, so the offset will be 0x4.
58102
59- ; PAL-LABEL: {{^}}scratch2_cs:
60- ; PAL: s_movk_i32 s{{[0-9]+}}, 0x1234
61- ; PAL: s_mov_b32 s[[GITPTR:[0-9]+]], s0
62- ; CI: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:{{[0-9]+\]}}, 0x4
63- ; VI: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:{{[0-9]+\]}}, 0x10
64- ; PAL: buffer_store{{.*}}, s[[[SCRATCHDESC]]:
65-
66103define amdgpu_cs void @scratch2_cs (i32 inreg , i32 inreg , i32 inreg , <3 x i32 > inreg , i32 inreg , <3 x i32 > %coord , <2 x i32 > %in , i32 %extra , i32 %idx ) #0 {
104+ ; CI-LABEL: scratch2_cs:
105+ ; CI: ; %bb.0: ; %entry
106+ ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v6
107+ ; CI-NEXT: v_mov_b32_e32 v2, v5
108+ ; CI-NEXT: s_delay_alu instid0(VALU_DEP_2)
109+ ; CI-NEXT: v_add3_u32 v0, 0, v0, 4
110+ ; CI-NEXT: scratch_store_b96 off, v[2:4], off
111+ ; CI-NEXT: scratch_load_b32 v0, v0, off
112+ ; CI-NEXT: s_waitcnt vmcnt(0)
113+ ; CI-NEXT: buffer_store_b32 v0, off, s[0:3], 0
114+ ; CI-NEXT: s_endpgm
115+ ;
116+ ; VI-LABEL: scratch2_cs:
117+ ; VI: ; %bb.0: ; %entry
118+ ; VI-NEXT: v_mov_b32_e32 v2, v5
119+ ; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v6
120+ ; VI-NEXT: scratch_store_b96 off, v[2:4], off
121+ ; VI-NEXT: scratch_load_b32 v0, v0, off offset:4
122+ ; VI-NEXT: s_wait_loadcnt 0x0
123+ ; VI-NEXT: buffer_store_b32 v0, off, s[0:3], null
124+ ; VI-NEXT: s_endpgm
67125entry:
68126 %v = alloca [3 x i32 ], addrspace (5 )
69127 %v1 = getelementptr [3 x i32 ], ptr addrspace (5 ) %v , i32 0 , i32 1
@@ -79,15 +137,5 @@ entry:
79137attributes #0 = { nounwind "amdgpu-git-ptr-high" ="0x1234" }
80138
81139declare void @llvm.amdgcn.raw.ptr.buffer.store.f32 (float , ptr addrspace (8 ), i32 , i32 , i32 immarg)
82-
83-
84- ; PAL: .amdgpu_pal_metadata
85- ; PAL-NEXT: ---
86- ; PAL-NEXT: amdpal.pipelines:
87- ; PAL-NEXT: - .hardware_stages:
88- ; PAL-NEXT: .cs:
89- ; PAL-NEXT: .entry_point: _amdgpu_cs_main
90- ; PAL-NEXT: .entry_point_symbol: scratch2_cs
91- ; PAL-NEXT: .scratch_memory_size: 0x10
92- ; PAL-NEXT: .sgpr_count: 0x
93- ; PAL-NEXT: .vgpr_count: 0x
140+ ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
141+ ; PAL: {{.*}}
0 commit comments