1- ; RUN: llc -mtriple=amdgcn < %s | FileCheck -enable-var-scope -check-prefixes=SI-SAFE,GCN,FUNC %s
2- ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn < %s | FileCheck -enable-var-scope -check-prefixes=GCN-NONAN,GCN,FUNC %s
1+ ; RUN: llc -mtriple=amdgcn < %s | FileCheck -enable-var-scope -check-prefixes=SI,GCN,FUNC %s
32
4- ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefixes=VI-SAFE,GCN,FUNC %s
5- ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefixes=GCN-NONAN,GCN,FUNC %s
3+ ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefixes=VI,GCN,FUNC %s
64
75; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope --check-prefixes=EG,FUNC %s
86
@@ -12,12 +10,10 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
1210; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1311; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1412
15- ; SI-SAFE : v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
13+ ; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
1614
17- ; VI-SAFE: v_cmp_nlt_f32_e32 vcc, [[A]], [[B]]
18- ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
19-
20- ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
15+ ; VI: v_cmp_nlt_f32_e32 vcc, [[A]], [[B]]
16+ ; VI: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
2117
2218; EG: MAX
2319define amdgpu_kernel void @test_fmax_legacy_uge_f32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
@@ -34,18 +30,38 @@ define amdgpu_kernel void @test_fmax_legacy_uge_f32(ptr addrspace(1) %out, ptr a
3430 ret void
3531}
3632
33+ ; FUNC-LABEL: {{^}}test_fmax_legacy_uge_f32_fast:
34+ ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
35+ ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
36+
37+ ; GCN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
38+
39+ ; EG: MAX
40+ define amdgpu_kernel void @test_fmax_legacy_uge_f32_fast (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
41+ %tid = call i32 @llvm.amdgcn.workitem.id.x () #1
42+ %gep.0 = getelementptr float , ptr addrspace (1 ) %in , i32 %tid
43+ %gep.1 = getelementptr float , ptr addrspace (1 ) %gep.0 , i32 1
44+
45+ %a = load volatile float , ptr addrspace (1 ) %gep.0 , align 4
46+ %b = load volatile float , ptr addrspace (1 ) %gep.1 , align 4
47+
48+ %cmp = fcmp uge float %a , %b
49+ %val = select nnan nsz i1 %cmp , float %a , float %b
50+ store float %val , ptr addrspace (1 ) %out , align 4
51+ ret void
52+ }
53+
3754; FUNC-LABEL: {{^}}test_fmax_legacy_uge_f32_nnan_src:
3855; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
3956; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
4057; GCN-DAG: v_add_f32_e32 [[ADD_A:v[0-9]+]], 1.0, [[A]]
4158; GCN-DAG: v_add_f32_e32 [[ADD_B:v[0-9]+]], 2.0, [[B]]
4259
43- ; SI-SAFE : v_max_legacy_f32_e32 {{v[0-9]+}}, [[ADD_B]], [[ADD_A]]
60+ ; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[ADD_B]], [[ADD_A]]
4461
45- ; VI-SAFE : v_cmp_nlt_f32_e32 vcc, [[ADD_A]], [[ADD_B]]
46- ; VI-SAFE : v_cndmask_b32_e32 v{{[0-9]+}}, [[ADD_B]], [[ADD_A]]
62+ ; VI: v_cmp_nlt_f32_e32 vcc, [[ADD_A]], [[ADD_B]]
63+ ; VI: v_cndmask_b32_e32 v{{[0-9]+}}, [[ADD_B]], [[ADD_A]]
4764
48- ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[ADD_A]], [[ADD_B]]
4965
5066; EG: MAX
5167define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
@@ -64,16 +80,40 @@ define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src(ptr addrspace(1) %o
6480 ret void
6581}
6682
83+ ; FUNC-LABEL: {{^}}test_fmax_legacy_uge_f32_nnan_src_fast:
84+ ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
85+ ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
86+ ; GCN-DAG: v_add_f32_e32 [[ADD_A:v[0-9]+]], 1.0, [[A]]
87+ ; GCN-DAG: v_add_f32_e32 [[ADD_B:v[0-9]+]], 2.0, [[B]]
88+
89+ ; GCN: v_max_f32_e32 {{v[0-9]+}}, [[ADD_A]], [[ADD_B]]
90+
91+ ; EG: MAX
92+ define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src_fast (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
93+ %tid = call i32 @llvm.amdgcn.workitem.id.x () #1
94+ %gep.0 = getelementptr float , ptr addrspace (1 ) %in , i32 %tid
95+ %gep.1 = getelementptr float , ptr addrspace (1 ) %gep.0 , i32 1
96+
97+ %a = load volatile float , ptr addrspace (1 ) %gep.0 , align 4
98+ %b = load volatile float , ptr addrspace (1 ) %gep.1 , align 4
99+ %a.nnan = fadd nnan float %a , 1 .0
100+ %b.nnan = fadd nnan float %b , 2 .0
101+
102+ %cmp = fcmp uge float %a.nnan , %b.nnan
103+ %val = select nnan nsz i1 %cmp , float %a.nnan , float %b.nnan
104+ store float %val , ptr addrspace (1 ) %out , align 4
105+ ret void
106+ }
107+
67108; FUNC-LABEL: {{^}}test_fmax_legacy_oge_f32:
68109; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
69110; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
70111
71- ; SI-SAFE : v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
112+ ; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
72113
73- ; VI-SAFE : v_cmp_ge_f32_e32 vcc, [[A]], [[B]]
74- ; VI-SAFE : v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
114+ ; VI: v_cmp_ge_f32_e32 vcc, [[A]], [[B]]
115+ ; VI: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
75116
76- ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
77117; EG: MAX
78118define amdgpu_kernel void @test_fmax_legacy_oge_f32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
79119 %tid = call i32 @llvm.amdgcn.workitem.id.x () #1
@@ -89,17 +129,35 @@ define amdgpu_kernel void @test_fmax_legacy_oge_f32(ptr addrspace(1) %out, ptr a
89129 ret void
90130}
91131
92- ; FUNC-LABEL: {{^}}test_fmax_legacy_ugt_f32 :
132+ ; FUNC-LABEL: {{^}}test_fmax_legacy_oge_f32_fast :
93133; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
94134; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
95135
96- ; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
136+ ; GCN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
137+ ; EG: MAX
138+ define amdgpu_kernel void @test_fmax_legacy_oge_f32_fast (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
139+ %tid = call i32 @llvm.amdgcn.workitem.id.x () #1
140+ %gep.0 = getelementptr float , ptr addrspace (1 ) %in , i32 %tid
141+ %gep.1 = getelementptr float , ptr addrspace (1 ) %gep.0 , i32 1
142+
143+ %a = load volatile float , ptr addrspace (1 ) %gep.0 , align 4
144+ %b = load volatile float , ptr addrspace (1 ) %gep.1 , align 4
97145
98- ; VI-SAFE: v_cmp_nle_f32_e32 vcc, [[A]], [[B]]
99- ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
146+ %cmp = fcmp oge float %a , %b
147+ %val = select nnan nsz i1 %cmp , float %a , float %b
148+ store float %val , ptr addrspace (1 ) %out , align 4
149+ ret void
150+ }
100151
152+ ; FUNC-LABEL: {{^}}test_fmax_legacy_ugt_f32:
153+ ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
154+ ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
155+
156+ ; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
157+
158+ ; VI: v_cmp_nle_f32_e32 vcc, [[A]], [[B]]
159+ ; VI: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
101160
102- ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
103161; EG: MAX
104162define amdgpu_kernel void @test_fmax_legacy_ugt_f32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
105163 %tid = call i32 @llvm.amdgcn.workitem.id.x () #1
@@ -115,16 +173,35 @@ define amdgpu_kernel void @test_fmax_legacy_ugt_f32(ptr addrspace(1) %out, ptr a
115173 ret void
116174}
117175
176+ ; FUNC-LABEL: {{^}}test_fmax_legacy_ugt_f32_fast:
177+ ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
178+ ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
179+
180+ ; GCN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
181+ ; EG: MAX
182+ define amdgpu_kernel void @test_fmax_legacy_ugt_f32_fast (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
183+ %tid = call i32 @llvm.amdgcn.workitem.id.x () #1
184+ %gep.0 = getelementptr float , ptr addrspace (1 ) %in , i32 %tid
185+ %gep.1 = getelementptr float , ptr addrspace (1 ) %gep.0 , i32 1
186+
187+ %a = load volatile float , ptr addrspace (1 ) %gep.0 , align 4
188+ %b = load volatile float , ptr addrspace (1 ) %gep.1 , align 4
189+
190+ %cmp = fcmp ugt float %a , %b
191+ %val = select nnan nsz i1 %cmp , float %a , float %b
192+ store float %val , ptr addrspace (1 ) %out , align 4
193+ ret void
194+ }
195+
118196; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_f32:
119197; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
120198; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
121199
122- ; SI-SAFE : v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
200+ ; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
123201
124- ; VI-SAFE : v_cmp_gt_f32_e32 vcc, [[A]], [[B]]
125- ; VI-SAFE : v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
202+ ; VI: v_cmp_gt_f32_e32 vcc, [[A]], [[B]]
203+ ; VI: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
126204
127- ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
128205; EG: MAX
129206define amdgpu_kernel void @test_fmax_legacy_ogt_f32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
130207 %tid = call i32 @llvm.amdgcn.workitem.id.x () #1
@@ -140,17 +217,35 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_f32(ptr addrspace(1) %out, ptr a
140217 ret void
141218}
142219
143- ; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v1f32 :
220+ ; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_f32_fast :
144221; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
145222; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
146223
147- ; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
224+ ; GCN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
225+ ; EG: MAX
226+ define amdgpu_kernel void @test_fmax_legacy_ogt_f32_fast (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
227+ %tid = call i32 @llvm.amdgcn.workitem.id.x () #1
228+ %gep.0 = getelementptr float , ptr addrspace (1 ) %in , i32 %tid
229+ %gep.1 = getelementptr float , ptr addrspace (1 ) %gep.0 , i32 1
230+
231+ %a = load volatile float , ptr addrspace (1 ) %gep.0 , align 4
232+ %b = load volatile float , ptr addrspace (1 ) %gep.1 , align 4
148233
149- ; VI-SAFE: v_cmp_gt_f32_e32 vcc, [[A]], [[B]]
150- ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
234+ %cmp = fcmp ogt float %a , %b
235+ %val = select nnan nsz i1 %cmp , float %a , float %b
236+ store float %val , ptr addrspace (1 ) %out , align 4
237+ ret void
238+ }
151239
240+ ; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v1f32:
241+ ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
242+ ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
243+
244+ ; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
245+
246+ ; VI: v_cmp_gt_f32_e32 vcc, [[A]], [[B]]
247+ ; VI: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
152248
153- ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
154249; EG: MAX
155250define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
156251 %tid = call i32 @llvm.amdgcn.workitem.id.x () #1
@@ -166,23 +261,39 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(ptr addrspace(1) %out, ptr
166261 ret void
167262}
168263
264+ ; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v1f32_fast:
265+ ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
266+ ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
267+
268+ ; GCN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
269+ ; EG: MAX
270+ define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32_fast (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
271+ %tid = call i32 @llvm.amdgcn.workitem.id.x () #1
272+ %gep.0 = getelementptr <1 x float >, ptr addrspace (1 ) %in , i32 %tid
273+ %gep.1 = getelementptr <1 x float >, ptr addrspace (1 ) %gep.0 , i32 1
274+
275+ %a = load volatile <1 x float >, ptr addrspace (1 ) %gep.0
276+ %b = load volatile <1 x float >, ptr addrspace (1 ) %gep.1
277+
278+ %cmp = fcmp ogt <1 x float > %a , %b
279+ %val = select nnan nsz <1 x i1 > %cmp , <1 x float > %a , <1 x float > %b
280+ store <1 x float > %val , ptr addrspace (1 ) %out
281+ ret void
282+ }
283+
169284; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v3f32:
170- ; SI-SAFE: v_max_legacy_f32_e32
171- ; SI-SAFE: v_max_legacy_f32_e32
172- ; SI-SAFE: v_max_legacy_f32_e32
173-
174- ; VI-SAFE: v_cmp_gt_f32_e32
175- ; VI-SAFE: v_cndmask_b32_e32
176- ; VI-SAFE: v_cmp_gt_f32_e32
177- ; VI-SAFE: v_cndmask_b32_e32
178- ; VI-SAFE: v_cmp_gt_f32_e32
179- ; VI-SAFE: v_cndmask_b32_e32
180- ; VI-SAFE-NOT: v_cmp
181- ; VI-SAFE-NOT: v_cndmask
182-
183- ; GCN-NONAN: v_max_f32_e32
184- ; GCN-NONAN: v_max_f32_e32
185- ; GCN-NONAN: v_max_f32_e32
285+ ; SI: v_max_legacy_f32_e32
286+ ; SI: v_max_legacy_f32_e32
287+ ; SI: v_max_legacy_f32_e32
288+
289+ ; VI: v_cmp_gt_f32_e32
290+ ; VI: v_cndmask_b32_e32
291+ ; VI: v_cmp_gt_f32_e32
292+ ; VI: v_cndmask_b32_e32
293+ ; VI: v_cmp_gt_f32_e32
294+ ; VI: v_cndmask_b32_e32
295+ ; VI-NOT: v_cmp
296+ ; VI-NOT: v_cndmask
186297
187298; GCN-NOT: v_max
188299define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
@@ -199,6 +310,27 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(ptr addrspace(1) %out, ptr
199310 ret void
200311}
201312
313+ ; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v3f32_fast:
314+
315+ ; GCN: v_max_f32_e32
316+ ; GCN: v_max_f32_e32
317+ ; GCN: v_max_f32_e32
318+
319+ ; GCN-NOT: v_max
320+ define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32_fast (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
321+ %tid = call i32 @llvm.amdgcn.workitem.id.x () #1
322+ %gep.0 = getelementptr <3 x float >, ptr addrspace (1 ) %in , i32 %tid
323+ %gep.1 = getelementptr <3 x float >, ptr addrspace (1 ) %gep.0 , i32 1
324+
325+ %a = load <3 x float >, ptr addrspace (1 ) %gep.0
326+ %b = load <3 x float >, ptr addrspace (1 ) %gep.1
327+
328+ %cmp = fcmp ogt <3 x float > %a , %b
329+ %val = select nnan nsz <3 x i1 > %cmp , <3 x float > %a , <3 x float > %b
330+ store <3 x float > %val , ptr addrspace (1 ) %out
331+ ret void
332+ }
333+
202334; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_f32_multi_use:
203335; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
204336; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
0 commit comments