1
- ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,FUNC %s
2
- ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX89,FUNC %s
3
- ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89,FUNC %s
4
- ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=EG,FUNC %s
1
+ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
2
+ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX89 %s
3
+ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s
5
4
6
- declare i32 @llvm.r600.read.tidig .x () readnone
5
+ declare i32 @llvm.amdgcn.workitem.id .x () nounwind readnone speculatable
7
6
8
- ; FUNC -LABEL: {{^}}s_sub_i32:
7
+ ; GCN -LABEL: {{^}}s_sub_i32:
9
8
; GCN: s_load_dwordx2
10
9
; GCN: s_load_dwordx2 s{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}
11
10
; GCN: s_sub_i32 s{{[0-9]+}}, s[[A]], s[[B]]
@@ -15,7 +14,7 @@ define amdgpu_kernel void @s_sub_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
15
14
ret void
16
15
}
17
16
18
- ; FUNC -LABEL: {{^}}s_sub_imm_i32:
17
+ ; GCN -LABEL: {{^}}s_sub_imm_i32:
19
18
; GCN: s_load_dword [[A:s[0-9]+]]
20
19
; GCN: s_sub_i32 s{{[0-9]+}}, 0x4d2, [[A]]
21
20
define amdgpu_kernel void @s_sub_imm_i32 (i32 addrspace (1 )* %out , i32 %a ) {
@@ -24,9 +23,7 @@ define amdgpu_kernel void @s_sub_imm_i32(i32 addrspace(1)* %out, i32 %a) {
24
23
ret void
25
24
}
26
25
27
- ; FUNC-LABEL: {{^}}test_sub_i32:
28
- ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
29
-
26
+ ; GCN-LABEL: {{^}}test_sub_i32:
30
27
; SI: v_subrev_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
31
28
; GFX9: v_sub_u32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
32
29
define amdgpu_kernel void @test_sub_i32 (i32 addrspace (1 )* %out , i32 addrspace (1 )* %in ) {
@@ -38,9 +35,7 @@ define amdgpu_kernel void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)
38
35
ret void
39
36
}
40
37
41
- ; FUNC-LABEL: {{^}}test_sub_imm_i32:
42
- ; EG: SUB_INT
43
-
38
+ ; GCN-LABEL: {{^}}test_sub_imm_i32:
44
39
; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, 0x7b, v{{[0-9]+}}
45
40
; GFX9: v_sub_u32_e32 v{{[0-9]+}}, 0x7b, v{{[0-9]+}}
46
41
define amdgpu_kernel void @test_sub_imm_i32 (i32 addrspace (1 )* %out , i32 addrspace (1 )* %in ) {
@@ -50,10 +45,7 @@ define amdgpu_kernel void @test_sub_imm_i32(i32 addrspace(1)* %out, i32 addrspac
50
45
ret void
51
46
}
52
47
53
- ; FUNC-LABEL: {{^}}test_sub_v2i32:
54
- ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
55
- ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
56
-
48
+ ; GCN-LABEL: {{^}}test_sub_v2i32:
57
49
; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
58
50
; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
59
51
@@ -68,12 +60,7 @@ define amdgpu_kernel void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32
68
60
ret void
69
61
}
70
62
71
- ; FUNC-LABEL: {{^}}test_sub_v4i32:
72
- ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
73
- ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
74
- ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
75
- ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
76
-
63
+ ; GCN-LABEL: {{^}}test_sub_v4i32:
77
64
; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
78
65
; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
79
66
; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
@@ -92,11 +79,11 @@ define amdgpu_kernel void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32
92
79
ret void
93
80
}
94
81
95
- ; FUNC -LABEL: {{^}}test_sub_i16:
82
+ ; GCN -LABEL: {{^}}test_sub_i16:
96
83
; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc,
97
84
; GFX89: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
98
85
define amdgpu_kernel void @test_sub_i16 (i16 addrspace (1 )* %out , i16 addrspace (1 )* %in ) {
99
- %tid = call i32 @llvm.r600.read.tidig .x ()
86
+ %tid = call i32 @llvm.amdgcn.workitem.id .x ()
100
87
%gep = getelementptr i16 , i16 addrspace (1 )* %in , i32 %tid
101
88
%b_ptr = getelementptr i16 , i16 addrspace (1 )* %gep , i32 1
102
89
%a = load volatile i16 , i16 addrspace (1 )* %gep
@@ -106,13 +93,13 @@ define amdgpu_kernel void @test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)
106
93
ret void
107
94
}
108
95
109
- ; FUNC -LABEL: {{^}}test_sub_v2i16:
96
+ ; GCN -LABEL: {{^}}test_sub_v2i16:
110
97
; VI: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
111
98
; VI: v_sub_u16_sdwa v{{[0-9]+, v[0-9]+, v[0-9]+}}
112
99
113
100
; GFX9: v_pk_sub_i16
114
101
define amdgpu_kernel void @test_sub_v2i16 (<2 x i16 > addrspace (1 )* %out , <2 x i16 > addrspace (1 )* %in ) {
115
- %tid = call i32 @llvm.r600.read.tidig .x ()
102
+ %tid = call i32 @llvm.amdgcn.workitem.id .x ()
116
103
%gep = getelementptr <2 x i16 >, <2 x i16 > addrspace (1 )* %in , i32 %tid
117
104
%b_ptr = getelementptr <2 x i16 >, <2 x i16 > addrspace (1 )* %gep , i16 1
118
105
%a = load <2 x i16 >, <2 x i16 > addrspace (1 )* %gep
@@ -122,7 +109,7 @@ define amdgpu_kernel void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16
122
109
ret void
123
110
}
124
111
125
- ; FUNC -LABEL: {{^}}test_sub_v4i16:
112
+ ; GCN -LABEL: {{^}}test_sub_v4i16:
126
113
; VI: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
127
114
; VI: v_sub_u16_sdwa v{{[0-9]+, v[0-9]+, v[0-9]+}}
128
115
; VI: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
@@ -131,7 +118,7 @@ define amdgpu_kernel void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16
131
118
; GFX9: v_pk_sub_i16
132
119
; GFX9: v_pk_sub_i16
133
120
define amdgpu_kernel void @test_sub_v4i16 (<4 x i16 > addrspace (1 )* %out , <4 x i16 > addrspace (1 )* %in ) {
134
- %tid = call i32 @llvm.r600.read.tidig .x ()
121
+ %tid = call i32 @llvm.amdgcn.workitem.id .x ()
135
122
%gep = getelementptr <4 x i16 >, <4 x i16 > addrspace (1 )* %in , i32 %tid
136
123
%b_ptr = getelementptr <4 x i16 >, <4 x i16 > addrspace (1 )* %gep , i16 1
137
124
%a = load <4 x i16 >, <4 x i16 > addrspace (1 ) * %gep
@@ -141,22 +128,16 @@ define amdgpu_kernel void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16
141
128
ret void
142
129
}
143
130
144
- ; FUNC -LABEL: {{^}}s_sub_i64:
131
+ ; GCN -LABEL: {{^}}s_sub_i64:
145
132
; GCN: s_sub_u32
146
133
; GCN: s_subb_u32
147
-
148
- ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
149
- ; EG-DAG: SUB_INT {{[* ]*}}
150
- ; EG-DAG: SUBB_UINT
151
- ; EG-DAG: SUB_INT
152
- ; EG-DAG: SUB_INT {{[* ]*}}
153
134
define amdgpu_kernel void @s_sub_i64 (i64 addrspace (1 )* noalias %out , i64 %a , i64 %b ) nounwind {
154
135
%result = sub i64 %a , %b
155
136
store i64 %result , i64 addrspace (1 )* %out , align 8
156
137
ret void
157
138
}
158
139
159
- ; FUNC -LABEL: {{^}}v_sub_i64:
140
+ ; GCN -LABEL: {{^}}v_sub_i64:
160
141
; SI: v_sub_i32_e32
161
142
; SI: v_subb_u32_e32
162
143
@@ -165,14 +146,8 @@ define amdgpu_kernel void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64
165
146
166
147
; GFX9: v_sub_co_u32_e32
167
148
; GFX9: v_subb_co_u32_e32
168
-
169
- ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
170
- ; EG-DAG: SUB_INT {{[* ]*}}
171
- ; EG-DAG: SUBB_UINT
172
- ; EG-DAG: SUB_INT
173
- ; EG-DAG: SUB_INT {{[* ]*}}
174
149
define amdgpu_kernel void @v_sub_i64 (i64 addrspace (1 )* noalias %out , i64 addrspace (1 )* noalias %inA , i64 addrspace (1 )* noalias %inB ) nounwind {
175
- %tid = call i32 @llvm.r600.read.tidig .x () readnone
150
+ %tid = call i32 @llvm.amdgcn.workitem.id .x () readnone
176
151
%a_ptr = getelementptr i64 , i64 addrspace (1 )* %inA , i32 %tid
177
152
%b_ptr = getelementptr i64 , i64 addrspace (1 )* %inB , i32 %tid
178
153
%a = load i64 , i64 addrspace (1 )* %a_ptr
@@ -182,7 +157,7 @@ define amdgpu_kernel void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspa
182
157
ret void
183
158
}
184
159
185
- ; FUNC -LABEL: {{^}}v_test_sub_v2i64:
160
+ ; GCN -LABEL: {{^}}v_test_sub_v2i64:
186
161
; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc,
187
162
; SI: v_subb_u32_e32 v{{[0-9]+}}, vcc,
188
163
; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc,
@@ -198,7 +173,7 @@ define amdgpu_kernel void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspa
198
173
; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc,
199
174
; GFX9: v_subb_co_u32_e32 v{{[0-9]+}}, vcc,
200
175
define amdgpu_kernel void @v_test_sub_v2i64 (<2 x i64 > addrspace (1 )* %out , <2 x i64 > addrspace (1 )* noalias %inA , <2 x i64 > addrspace (1 )* noalias %inB ) {
201
- %tid = call i32 @llvm.r600.read.tidig .x () readnone
176
+ %tid = call i32 @llvm.amdgcn.workitem.id .x () readnone
202
177
%a_ptr = getelementptr <2 x i64 >, <2 x i64 > addrspace (1 )* %inA , i32 %tid
203
178
%b_ptr = getelementptr <2 x i64 >, <2 x i64 > addrspace (1 )* %inB , i32 %tid
204
179
%a = load <2 x i64 >, <2 x i64 > addrspace (1 )* %a_ptr
@@ -208,7 +183,7 @@ define amdgpu_kernel void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i
208
183
ret void
209
184
}
210
185
211
- ; FUNC -LABEL: {{^}}v_test_sub_v4i64:
186
+ ; GCN -LABEL: {{^}}v_test_sub_v4i64:
212
187
; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc,
213
188
; SI: v_subb_u32_e32 v{{[0-9]+}}, vcc,
214
189
; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc,
@@ -236,7 +211,7 @@ define amdgpu_kernel void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i
236
211
; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc,
237
212
; GFX9: v_subb_co_u32_e32 v{{[0-9]+}}, vcc,
238
213
define amdgpu_kernel void @v_test_sub_v4i64 (<4 x i64 > addrspace (1 )* %out , <4 x i64 > addrspace (1 )* noalias %inA , <4 x i64 > addrspace (1 )* noalias %inB ) {
239
- %tid = call i32 @llvm.r600.read.tidig .x () readnone
214
+ %tid = call i32 @llvm.amdgcn.workitem.id .x () readnone
240
215
%a_ptr = getelementptr <4 x i64 >, <4 x i64 > addrspace (1 )* %inA , i32 %tid
241
216
%b_ptr = getelementptr <4 x i64 >, <4 x i64 > addrspace (1 )* %inB , i32 %tid
242
217
%a = load <4 x i64 >, <4 x i64 > addrspace (1 )* %a_ptr
@@ -245,3 +220,22 @@ define amdgpu_kernel void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i
245
220
store <4 x i64 > %result , <4 x i64 > addrspace (1 )* %out
246
221
ret void
247
222
}
223
+
224
+ ; Make sure the VOP3 form of sub is initially selected. Otherwise pair
225
+ ; of opies from/to VCC would be necessary
226
+
227
+ ; GCN-LABEL: {{^}}sub_select_vop3:
228
+ ; SI: v_subrev_i32_e64 v0, s[0:1], s0, v0
229
+ ; VI: v_subrev_u32_e64 v0, s[0:1], s0, v0
230
+ ; GFX9: v_subrev_u32_e32 v0, s0, v0
231
+
232
+ ; GCN: ; def vcc
233
+ ; GCN: ds_write_b32
234
+ ; GCN: ; use vcc
235
+ define amdgpu_ps void @sub_select_vop3 (i32 inreg %s , i32 %v ) {
236
+ %vcc = call i64 asm sideeffect "; def vcc" , "={vcc}" ()
237
+ %sub = sub i32 %v , %s
238
+ store i32 %sub , i32 addrspace (3 )* undef
239
+ call void asm sideeffect "; use vcc" , "{vcc}" (i64 %vcc )
240
+ ret void
241
+ }
0 commit comments