1
- ; RUN: llc -mtriple=amdgcn < %s | FileCheck -enable-var-scope -check-prefixes=SI-SAFE,GCN,FUNC %s
2
- ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn < %s | FileCheck -enable-var-scope -check-prefixes=GCN-NONAN,GCN,FUNC %s
1
+ ; RUN: llc -mtriple=amdgcn < %s | FileCheck -enable-var-scope -check-prefixes=SI,GCN,FUNC %s
3
2
4
- ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefixes=VI-SAFE,GCN,FUNC %s
5
- ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefixes=GCN-NONAN,GCN,FUNC %s
3
+ ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefixes=VI,GCN,FUNC %s
6
4
7
5
; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope --check-prefixes=EG,FUNC %s
8
6
@@ -12,12 +10,10 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
12
10
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
13
11
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
14
12
15
- ; SI-SAFE : v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
13
+ ; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
16
14
17
- ; VI-SAFE: v_cmp_nlt_f32_e32 vcc, [[A]], [[B]]
18
- ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
19
-
20
- ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
15
+ ; VI: v_cmp_nlt_f32_e32 vcc, [[A]], [[B]]
16
+ ; VI: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
21
17
22
18
; EG: MAX
23
19
define amdgpu_kernel void @test_fmax_legacy_uge_f32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
@@ -34,18 +30,38 @@ define amdgpu_kernel void @test_fmax_legacy_uge_f32(ptr addrspace(1) %out, ptr a
34
30
ret void
35
31
}
36
32
33
+ ; FUNC-LABEL: {{^}}test_fmax_legacy_uge_f32_fast:
34
+ ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
35
+ ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
36
+
37
+ ; GCN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
38
+
39
+ ; EG: MAX
40
+ define amdgpu_kernel void @test_fmax_legacy_uge_f32_fast (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
41
+ %tid = call i32 @llvm.amdgcn.workitem.id.x () #1
42
+ %gep.0 = getelementptr float , ptr addrspace (1 ) %in , i32 %tid
43
+ %gep.1 = getelementptr float , ptr addrspace (1 ) %gep.0 , i32 1
44
+
45
+ %a = load volatile float , ptr addrspace (1 ) %gep.0 , align 4
46
+ %b = load volatile float , ptr addrspace (1 ) %gep.1 , align 4
47
+
48
+ %cmp = fcmp uge float %a , %b
49
+ %val = select nnan nsz i1 %cmp , float %a , float %b
50
+ store float %val , ptr addrspace (1 ) %out , align 4
51
+ ret void
52
+ }
53
+
37
54
; FUNC-LABEL: {{^}}test_fmax_legacy_uge_f32_nnan_src:
38
55
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
39
56
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
40
57
; GCN-DAG: v_add_f32_e32 [[ADD_A:v[0-9]+]], 1.0, [[A]]
41
58
; GCN-DAG: v_add_f32_e32 [[ADD_B:v[0-9]+]], 2.0, [[B]]
42
59
43
- ; SI-SAFE : v_max_legacy_f32_e32 {{v[0-9]+}}, [[ADD_B]], [[ADD_A]]
60
+ ; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[ADD_B]], [[ADD_A]]
44
61
45
- ; VI-SAFE : v_cmp_nlt_f32_e32 vcc, [[ADD_A]], [[ADD_B]]
46
- ; VI-SAFE : v_cndmask_b32_e32 v{{[0-9]+}}, [[ADD_B]], [[ADD_A]]
62
+ ; VI: v_cmp_nlt_f32_e32 vcc, [[ADD_A]], [[ADD_B]]
63
+ ; VI: v_cndmask_b32_e32 v{{[0-9]+}}, [[ADD_B]], [[ADD_A]]
47
64
48
- ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[ADD_A]], [[ADD_B]]
49
65
50
66
; EG: MAX
51
67
define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
@@ -64,16 +80,40 @@ define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src(ptr addrspace(1) %o
64
80
ret void
65
81
}
66
82
83
+ ; FUNC-LABEL: {{^}}test_fmax_legacy_uge_f32_nnan_src_fast:
84
+ ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
85
+ ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
86
+ ; GCN-DAG: v_add_f32_e32 [[ADD_A:v[0-9]+]], 1.0, [[A]]
87
+ ; GCN-DAG: v_add_f32_e32 [[ADD_B:v[0-9]+]], 2.0, [[B]]
88
+
89
+ ; GCN: v_max_f32_e32 {{v[0-9]+}}, [[ADD_A]], [[ADD_B]]
90
+
91
+ ; EG: MAX
92
+ define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src_fast (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
93
+ %tid = call i32 @llvm.amdgcn.workitem.id.x () #1
94
+ %gep.0 = getelementptr float , ptr addrspace (1 ) %in , i32 %tid
95
+ %gep.1 = getelementptr float , ptr addrspace (1 ) %gep.0 , i32 1
96
+
97
+ %a = load volatile float , ptr addrspace (1 ) %gep.0 , align 4
98
+ %b = load volatile float , ptr addrspace (1 ) %gep.1 , align 4
99
+ %a.nnan = fadd nnan float %a , 1 .0
100
+ %b.nnan = fadd nnan float %b , 2 .0
101
+
102
+ %cmp = fcmp uge float %a.nnan , %b.nnan
103
+ %val = select nnan nsz i1 %cmp , float %a.nnan , float %b.nnan
104
+ store float %val , ptr addrspace (1 ) %out , align 4
105
+ ret void
106
+ }
107
+
67
108
; FUNC-LABEL: {{^}}test_fmax_legacy_oge_f32:
68
109
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
69
110
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
70
111
71
- ; SI-SAFE : v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
112
+ ; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
72
113
73
- ; VI-SAFE : v_cmp_ge_f32_e32 vcc, [[A]], [[B]]
74
- ; VI-SAFE : v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
114
+ ; VI: v_cmp_ge_f32_e32 vcc, [[A]], [[B]]
115
+ ; VI: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
75
116
76
- ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
77
117
; EG: MAX
78
118
define amdgpu_kernel void @test_fmax_legacy_oge_f32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
79
119
%tid = call i32 @llvm.amdgcn.workitem.id.x () #1
@@ -89,17 +129,35 @@ define amdgpu_kernel void @test_fmax_legacy_oge_f32(ptr addrspace(1) %out, ptr a
89
129
ret void
90
130
}
91
131
92
- ; FUNC-LABEL: {{^}}test_fmax_legacy_ugt_f32 :
132
+ ; FUNC-LABEL: {{^}}test_fmax_legacy_oge_f32_fast :
93
133
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
94
134
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
95
135
96
- ; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
136
+ ; GCN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
137
+ ; EG: MAX
138
+ define amdgpu_kernel void @test_fmax_legacy_oge_f32_fast (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
139
+ %tid = call i32 @llvm.amdgcn.workitem.id.x () #1
140
+ %gep.0 = getelementptr float , ptr addrspace (1 ) %in , i32 %tid
141
+ %gep.1 = getelementptr float , ptr addrspace (1 ) %gep.0 , i32 1
142
+
143
+ %a = load volatile float , ptr addrspace (1 ) %gep.0 , align 4
144
+ %b = load volatile float , ptr addrspace (1 ) %gep.1 , align 4
97
145
98
- ; VI-SAFE: v_cmp_nle_f32_e32 vcc, [[A]], [[B]]
99
- ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
146
+ %cmp = fcmp oge float %a , %b
147
+ %val = select nnan nsz i1 %cmp , float %a , float %b
148
+ store float %val , ptr addrspace (1 ) %out , align 4
149
+ ret void
150
+ }
100
151
152
+ ; FUNC-LABEL: {{^}}test_fmax_legacy_ugt_f32:
153
+ ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
154
+ ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
155
+
156
+ ; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
157
+
158
+ ; VI: v_cmp_nle_f32_e32 vcc, [[A]], [[B]]
159
+ ; VI: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
101
160
102
- ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
103
161
; EG: MAX
104
162
define amdgpu_kernel void @test_fmax_legacy_ugt_f32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
105
163
%tid = call i32 @llvm.amdgcn.workitem.id.x () #1
@@ -115,16 +173,35 @@ define amdgpu_kernel void @test_fmax_legacy_ugt_f32(ptr addrspace(1) %out, ptr a
115
173
ret void
116
174
}
117
175
176
+ ; FUNC-LABEL: {{^}}test_fmax_legacy_ugt_f32_fast:
177
+ ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
178
+ ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
179
+
180
+ ; GCN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
181
+ ; EG: MAX
182
+ define amdgpu_kernel void @test_fmax_legacy_ugt_f32_fast (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
183
+ %tid = call i32 @llvm.amdgcn.workitem.id.x () #1
184
+ %gep.0 = getelementptr float , ptr addrspace (1 ) %in , i32 %tid
185
+ %gep.1 = getelementptr float , ptr addrspace (1 ) %gep.0 , i32 1
186
+
187
+ %a = load volatile float , ptr addrspace (1 ) %gep.0 , align 4
188
+ %b = load volatile float , ptr addrspace (1 ) %gep.1 , align 4
189
+
190
+ %cmp = fcmp ugt float %a , %b
191
+ %val = select nnan nsz i1 %cmp , float %a , float %b
192
+ store float %val , ptr addrspace (1 ) %out , align 4
193
+ ret void
194
+ }
195
+
118
196
; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_f32:
119
197
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
120
198
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
121
199
122
- ; SI-SAFE : v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
200
+ ; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
123
201
124
- ; VI-SAFE : v_cmp_gt_f32_e32 vcc, [[A]], [[B]]
125
- ; VI-SAFE : v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
202
+ ; VI: v_cmp_gt_f32_e32 vcc, [[A]], [[B]]
203
+ ; VI: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
126
204
127
- ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
128
205
; EG: MAX
129
206
define amdgpu_kernel void @test_fmax_legacy_ogt_f32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
130
207
%tid = call i32 @llvm.amdgcn.workitem.id.x () #1
@@ -140,17 +217,35 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_f32(ptr addrspace(1) %out, ptr a
140
217
ret void
141
218
}
142
219
143
- ; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v1f32 :
220
+ ; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_f32_fast :
144
221
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
145
222
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
146
223
147
- ; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
224
+ ; GCN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
225
+ ; EG: MAX
226
+ define amdgpu_kernel void @test_fmax_legacy_ogt_f32_fast (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
227
+ %tid = call i32 @llvm.amdgcn.workitem.id.x () #1
228
+ %gep.0 = getelementptr float , ptr addrspace (1 ) %in , i32 %tid
229
+ %gep.1 = getelementptr float , ptr addrspace (1 ) %gep.0 , i32 1
230
+
231
+ %a = load volatile float , ptr addrspace (1 ) %gep.0 , align 4
232
+ %b = load volatile float , ptr addrspace (1 ) %gep.1 , align 4
148
233
149
- ; VI-SAFE: v_cmp_gt_f32_e32 vcc, [[A]], [[B]]
150
- ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
234
+ %cmp = fcmp ogt float %a , %b
235
+ %val = select nnan nsz i1 %cmp , float %a , float %b
236
+ store float %val , ptr addrspace (1 ) %out , align 4
237
+ ret void
238
+ }
151
239
240
+ ; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v1f32:
241
+ ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
242
+ ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
243
+
244
+ ; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
245
+
246
+ ; VI: v_cmp_gt_f32_e32 vcc, [[A]], [[B]]
247
+ ; VI: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
152
248
153
- ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
154
249
; EG: MAX
155
250
define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
156
251
%tid = call i32 @llvm.amdgcn.workitem.id.x () #1
@@ -166,23 +261,39 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(ptr addrspace(1) %out, ptr
166
261
ret void
167
262
}
168
263
264
+ ; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v1f32_fast:
265
+ ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
266
+ ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
267
+
268
+ ; GCN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
269
+ ; EG: MAX
270
+ define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32_fast (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
271
+ %tid = call i32 @llvm.amdgcn.workitem.id.x () #1
272
+ %gep.0 = getelementptr <1 x float >, ptr addrspace (1 ) %in , i32 %tid
273
+ %gep.1 = getelementptr <1 x float >, ptr addrspace (1 ) %gep.0 , i32 1
274
+
275
+ %a = load volatile <1 x float >, ptr addrspace (1 ) %gep.0
276
+ %b = load volatile <1 x float >, ptr addrspace (1 ) %gep.1
277
+
278
+ %cmp = fcmp ogt <1 x float > %a , %b
279
+ %val = select nnan nsz <1 x i1 > %cmp , <1 x float > %a , <1 x float > %b
280
+ store <1 x float > %val , ptr addrspace (1 ) %out
281
+ ret void
282
+ }
283
+
169
284
; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v3f32:
170
- ; SI-SAFE: v_max_legacy_f32_e32
171
- ; SI-SAFE: v_max_legacy_f32_e32
172
- ; SI-SAFE: v_max_legacy_f32_e32
173
-
174
- ; VI-SAFE: v_cmp_gt_f32_e32
175
- ; VI-SAFE: v_cndmask_b32_e32
176
- ; VI-SAFE: v_cmp_gt_f32_e32
177
- ; VI-SAFE: v_cndmask_b32_e32
178
- ; VI-SAFE: v_cmp_gt_f32_e32
179
- ; VI-SAFE: v_cndmask_b32_e32
180
- ; VI-SAFE-NOT: v_cmp
181
- ; VI-SAFE-NOT: v_cndmask
182
-
183
- ; GCN-NONAN: v_max_f32_e32
184
- ; GCN-NONAN: v_max_f32_e32
185
- ; GCN-NONAN: v_max_f32_e32
285
+ ; SI: v_max_legacy_f32_e32
286
+ ; SI: v_max_legacy_f32_e32
287
+ ; SI: v_max_legacy_f32_e32
288
+
289
+ ; VI: v_cmp_gt_f32_e32
290
+ ; VI: v_cndmask_b32_e32
291
+ ; VI: v_cmp_gt_f32_e32
292
+ ; VI: v_cndmask_b32_e32
293
+ ; VI: v_cmp_gt_f32_e32
294
+ ; VI: v_cndmask_b32_e32
295
+ ; VI-NOT: v_cmp
296
+ ; VI-NOT: v_cndmask
186
297
187
298
; GCN-NOT: v_max
188
299
define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
@@ -199,6 +310,27 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(ptr addrspace(1) %out, ptr
199
310
ret void
200
311
}
201
312
313
+ ; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v3f32_fast:
314
+
315
+ ; GCN: v_max_f32_e32
316
+ ; GCN: v_max_f32_e32
317
+ ; GCN: v_max_f32_e32
318
+
319
+ ; GCN-NOT: v_max
320
+ define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32_fast (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
321
+ %tid = call i32 @llvm.amdgcn.workitem.id.x () #1
322
+ %gep.0 = getelementptr <3 x float >, ptr addrspace (1 ) %in , i32 %tid
323
+ %gep.1 = getelementptr <3 x float >, ptr addrspace (1 ) %gep.0 , i32 1
324
+
325
+ %a = load <3 x float >, ptr addrspace (1 ) %gep.0
326
+ %b = load <3 x float >, ptr addrspace (1 ) %gep.1
327
+
328
+ %cmp = fcmp ogt <3 x float > %a , %b
329
+ %val = select nnan nsz <3 x i1 > %cmp , <3 x float > %a , <3 x float > %b
330
+ store <3 x float > %val , ptr addrspace (1 ) %out
331
+ ret void
332
+ }
333
+
202
334
; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_f32_multi_use:
203
335
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
204
336
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
0 commit comments