Skip to content

Commit 0e0b1fb

Browse files
committed
update tests
1 parent db9e8f3 commit 0e0b1fb

14 files changed

+3039
-1262
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp
7474
; GFX10-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
7575
; GFX10: ; %bb.0:
7676
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77-
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
77+
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
78+
; GFX10-NEXT: v_med3_f32 v0, v0, 1.0, 0
7879
; GFX10-NEXT: s_setpc_b64 s[30:31]
7980
;
8081
; GFX12-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
@@ -84,7 +85,9 @@ define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp
8485
; GFX12-NEXT: s_wait_samplecnt 0x0
8586
; GFX12-NEXT: s_wait_bvhcnt 0x0
8687
; GFX12-NEXT: s_wait_kmcnt 0x0
87-
; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
88+
; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
89+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
90+
; GFX12-NEXT: v_med3_num_f32 v0, v0, 1.0, 0
8891
; GFX12-NEXT: s_setpc_b64 s[30:31]
8992
%fmul = fmul float %a, 2.0
9093
%fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0)
@@ -97,7 +100,8 @@ define float @test_fmed3_global_nnan(float %a) #3 {
97100
; GFX10-LABEL: test_fmed3_global_nnan:
98101
; GFX10: ; %bb.0:
99102
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100-
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
103+
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
104+
; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0
101105
; GFX10-NEXT: s_setpc_b64 s[30:31]
102106
;
103107
; GFX12-LABEL: test_fmed3_global_nnan:
@@ -107,7 +111,9 @@ define float @test_fmed3_global_nnan(float %a) #3 {
107111
; GFX12-NEXT: s_wait_samplecnt 0x0
108112
; GFX12-NEXT: s_wait_bvhcnt 0x0
109113
; GFX12-NEXT: s_wait_kmcnt 0x0
110-
; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
114+
; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
115+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
116+
; GFX12-NEXT: v_med3_num_f32 v0, v0, 0, 1.0
111117
; GFX12-NEXT: s_setpc_b64 s[30:31]
112118
%fmul = fmul float %a, 2.0
113119
%fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 0.0, float 1.0)
@@ -134,7 +140,9 @@ define float @test_fmed3_f32_maybe_NaN_ieee_false(float %a) #1 {
134140
; GFX12-NEXT: s_wait_samplecnt 0x0
135141
; GFX12-NEXT: s_wait_bvhcnt 0x0
136142
; GFX12-NEXT: s_wait_kmcnt 0x0
137-
; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
143+
; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
144+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
145+
; GFX12-NEXT: v_med3_num_f32 v0, v0, 1.0, 0
138146
; GFX12-NEXT: s_setpc_b64 s[30:31]
139147
%fmul = fmul float %a, 2.0
140148
%fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0)
@@ -172,7 +180,8 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2
172180
; GFX10-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
173181
; GFX10: ; %bb.0:
174182
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
175-
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
183+
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
184+
; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0
176185
; GFX10-NEXT: s_setpc_b64 s[30:31]
177186
;
178187
; GFX12-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
@@ -182,7 +191,9 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2
182191
; GFX12-NEXT: s_wait_samplecnt 0x0
183192
; GFX12-NEXT: s_wait_bvhcnt 0x0
184193
; GFX12-NEXT: s_wait_kmcnt 0x0
185-
; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
194+
; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
195+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
196+
; GFX12-NEXT: v_med3_num_f32 v0, v0, 0, 1.0
186197
; GFX12-NEXT: s_setpc_b64 s[30:31]
187198
%fmul = fmul float %a, 2.0
188199
%fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 0.0, float 1.0)

llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ define half @test_min_K1max_ValK0_f16(half %a) #2 {
5151
; GFX10-LABEL: test_min_K1max_ValK0_f16:
5252
; GFX10: ; %bb.0:
5353
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54-
; GFX10-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp
54+
; GFX10-NEXT: v_mul_f16_e32 v0, 2.0, v0
55+
; GFX10-NEXT: v_med3_f16 v0, v0, 0, 1.0
5556
; GFX10-NEXT: s_setpc_b64 s[30:31]
5657
;
5758
; GFX12-LABEL: test_min_K1max_ValK0_f16:
@@ -61,7 +62,9 @@ define half @test_min_K1max_ValK0_f16(half %a) #2 {
6162
; GFX12-NEXT: s_wait_samplecnt 0x0
6263
; GFX12-NEXT: s_wait_bvhcnt 0x0
6364
; GFX12-NEXT: s_wait_kmcnt 0x0
64-
; GFX12-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp
65+
; GFX12-NEXT: v_mul_f16_e32 v0, 2.0, v0
66+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
67+
; GFX12-NEXT: v_med3_num_f16 v0, v0, 0, 1.0
6568
; GFX12-NEXT: s_setpc_b64 s[30:31]
6669
%fmul = fmul half %a, 2.0
6770
%maxnum = call half @llvm.maxnum.f16(half %fmul, half 0.0)
@@ -95,7 +98,9 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
9598
; GFX10-LABEL: test_min_max_splat_padded_with_undef:
9699
; GFX10: ; %bb.0:
97100
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
98-
; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
101+
; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0]
102+
; GFX10-NEXT: v_pk_max_f16 v0, v0, 0
103+
; GFX10-NEXT: v_pk_min_f16 v0, v0, 1.0
99104
; GFX10-NEXT: s_setpc_b64 s[30:31]
100105
;
101106
; GFX12-LABEL: test_min_max_splat_padded_with_undef:
@@ -105,7 +110,10 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
105110
; GFX12-NEXT: s_wait_samplecnt 0x0
106111
; GFX12-NEXT: s_wait_bvhcnt 0x0
107112
; GFX12-NEXT: s_wait_kmcnt 0x0
108-
; GFX12-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
113+
; GFX12-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0]
114+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
115+
; GFX12-NEXT: v_pk_max_num_f16 v0, v0, 0
116+
; GFX12-NEXT: v_pk_min_num_f16 v0, v0, 1.0
109117
; GFX12-NEXT: s_setpc_b64 s[30:31]
110118
%fmul = fmul <2 x half> %a, <half 2.0, half 2.0>
111119
%maxnum = call <2 x half> @llvm.maxnum.v2f16(<2 x half> <half 0.0, half poison>, <2 x half> %fmul)
@@ -231,7 +239,9 @@ define float @test_max_min_global_nnan(float %a) #3 {
231239
; GFX10-LABEL: test_max_min_global_nnan:
232240
; GFX10: ; %bb.0:
233241
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
234-
; GFX10-NEXT: v_max_f32_e64 v0, v0, v0 clamp
242+
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
243+
; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
244+
; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
235245
; GFX10-NEXT: s_setpc_b64 s[30:31]
236246
;
237247
; GFX12-LABEL: test_max_min_global_nnan:
@@ -241,7 +251,9 @@ define float @test_max_min_global_nnan(float %a) #3 {
241251
; GFX12-NEXT: s_wait_samplecnt 0x0
242252
; GFX12-NEXT: s_wait_bvhcnt 0x0
243253
; GFX12-NEXT: s_wait_kmcnt 0x0
244-
; GFX12-NEXT: v_max_num_f32_e64 v0, v0, v0 clamp
254+
; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
255+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
256+
; GFX12-NEXT: v_minmax_num_f32 v0, v0, 1.0, 0
245257
; GFX12-NEXT: s_setpc_b64 s[30:31]
246258
%minnum = call float @llvm.minnum.f32(float %a, float 1.0)
247259
%fmed = call float @llvm.maxnum.f32(float %minnum, float 0.0)
@@ -305,9 +317,7 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
305317
; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
306318
; GFX10: ; %bb.0:
307319
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308-
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
309-
; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
310-
; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
320+
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
311321
; GFX10-NEXT: s_setpc_b64 s[30:31]
312322
;
313323
; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false:
@@ -317,7 +327,9 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
317327
; GFX12-NEXT: s_wait_samplecnt 0x0
318328
; GFX12-NEXT: s_wait_bvhcnt 0x0
319329
; GFX12-NEXT: s_wait_kmcnt 0x0
320-
; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
330+
; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
331+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
332+
; GFX12-NEXT: v_med3_num_f32 v0, v0, 0, 1.0
321333
; GFX12-NEXT: s_setpc_b64 s[30:31]
322334
%fmul = fmul float %a, 2.0
323335
%maxnum = call float @llvm.maxnum.f32(float %fmul, float 0.0)
@@ -341,7 +353,9 @@ define float @test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false(float %a) #
341353
; GFX12-NEXT: s_wait_samplecnt 0x0
342354
; GFX12-NEXT: s_wait_bvhcnt 0x0
343355
; GFX12-NEXT: s_wait_kmcnt 0x0
344-
; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
356+
; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
357+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
358+
; GFX12-NEXT: v_med3_num_f32 v0, v0, 0, 1.0
345359
; GFX12-NEXT: s_setpc_b64 s[30:31]
346360
%fmul = fmul float %a, 2.0
347361
%maxnum = call float @llvm.maxnum.f32(float %fmul, float 0.0)
@@ -381,9 +395,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
381395
; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
382396
; GFX10: ; %bb.0:
383397
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384-
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
385-
; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
386-
; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
398+
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
387399
; GFX10-NEXT: s_setpc_b64 s[30:31]
388400
;
389401
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false:

llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -236,12 +236,14 @@ define float @test_min_max_global_nnan(float %a) #2 {
236236
; GFX10-LABEL: test_min_max_global_nnan:
237237
; GFX10: ; %bb.0:
238238
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
239+
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
239240
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
240241
; GFX10-NEXT: s_setpc_b64 s[30:31]
241242
;
242243
; GFX8-LABEL: test_min_max_global_nnan:
243244
; GFX8: ; %bb.0:
244245
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
246+
; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
245247
; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
246248
; GFX8-NEXT: s_setpc_b64 s[30:31]
247249
;
@@ -252,6 +254,8 @@ define float @test_min_max_global_nnan(float %a) #2 {
252254
; GFX12-NEXT: s_wait_samplecnt 0x0
253255
; GFX12-NEXT: s_wait_bvhcnt 0x0
254256
; GFX12-NEXT: s_wait_kmcnt 0x0
257+
; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
258+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
255259
; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
256260
; GFX12-NEXT: s_setpc_b64 s[30:31]
257261
%maxnum = call float @llvm.maxnum.f32(float %a, float 2.0)
@@ -263,13 +267,17 @@ define float @test_max_min_global_nnan(float %a) #2 {
263267
; GFX10-LABEL: test_max_min_global_nnan:
264268
; GFX10: ; %bb.0:
265269
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266-
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
270+
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
271+
; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
272+
; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
267273
; GFX10-NEXT: s_setpc_b64 s[30:31]
268274
;
269275
; GFX8-LABEL: test_max_min_global_nnan:
270276
; GFX8: ; %bb.0:
271277
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
272-
; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
278+
; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
279+
; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
280+
; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
273281
; GFX8-NEXT: s_setpc_b64 s[30:31]
274282
;
275283
; GFX12-LABEL: test_max_min_global_nnan:
@@ -279,7 +287,9 @@ define float @test_max_min_global_nnan(float %a) #2 {
279287
; GFX12-NEXT: s_wait_samplecnt 0x0
280288
; GFX12-NEXT: s_wait_bvhcnt 0x0
281289
; GFX12-NEXT: s_wait_kmcnt 0x0
282-
; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
290+
; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
291+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
292+
; GFX12-NEXT: v_minmax_num_f32 v0, v0, 4.0, 2.0
283293
; GFX12-NEXT: s_setpc_b64 s[30:31]
284294
%minnum = call float @llvm.minnum.f32(float %a, float 4.0)
285295
%fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
@@ -456,15 +466,13 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
456466
; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
457467
; GFX10: ; %bb.0:
458468
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
459-
; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
460-
; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
469+
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
461470
; GFX10-NEXT: s_setpc_b64 s[30:31]
462471
;
463472
; GFX8-LABEL: test_min_max_maybe_NaN_input_ieee_false:
464473
; GFX8: ; %bb.0:
465474
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
466-
; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
467-
; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
475+
; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
468476
; GFX8-NEXT: s_setpc_b64 s[30:31]
469477
;
470478
; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false:
@@ -489,15 +497,13 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
489497
; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
490498
; GFX10: ; %bb.0:
491499
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492-
; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
493-
; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
500+
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
494501
; GFX10-NEXT: s_setpc_b64 s[30:31]
495502
;
496503
; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_false:
497504
; GFX8: ; %bb.0:
498505
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
499-
; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
500-
; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
506+
; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
501507
; GFX8-NEXT: s_setpc_b64 s[30:31]
502508
;
503509
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false:

0 commit comments

Comments
 (0)