@@ -51,7 +51,8 @@ define half @test_min_K1max_ValK0_f16(half %a) #2 {
5151; GFX10-LABEL: test_min_K1max_ValK0_f16:
5252; GFX10: ; %bb.0:
5353; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54- ; GFX10-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp
54+ ; GFX10-NEXT: v_mul_f16_e32 v0, 2.0, v0
55+ ; GFX10-NEXT: v_med3_f16 v0, v0, 0, 1.0
5556; GFX10-NEXT: s_setpc_b64 s[30:31]
5657;
5758; GFX12-LABEL: test_min_K1max_ValK0_f16:
@@ -61,7 +62,9 @@ define half @test_min_K1max_ValK0_f16(half %a) #2 {
6162; GFX12-NEXT: s_wait_samplecnt 0x0
6263; GFX12-NEXT: s_wait_bvhcnt 0x0
6364; GFX12-NEXT: s_wait_kmcnt 0x0
64- ; GFX12-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp
65+ ; GFX12-NEXT: v_mul_f16_e32 v0, 2.0, v0
66+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
67+ ; GFX12-NEXT: v_med3_num_f16 v0, v0, 0, 1.0
6568; GFX12-NEXT: s_setpc_b64 s[30:31]
6669 %fmul = fmul half %a , 2 .0
6770 %maxnum = call half @llvm.maxnum.f16 (half %fmul , half 0 .0 )
@@ -95,7 +98,9 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
9598; GFX10-LABEL: test_min_max_splat_padded_with_undef:
9699; GFX10: ; %bb.0:
97100; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
98- ; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
101+ ; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0]
102+ ; GFX10-NEXT: v_pk_max_f16 v0, v0, 0
103+ ; GFX10-NEXT: v_pk_min_f16 v0, v0, 1.0
99104; GFX10-NEXT: s_setpc_b64 s[30:31]
100105;
101106; GFX12-LABEL: test_min_max_splat_padded_with_undef:
@@ -105,7 +110,10 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
105110; GFX12-NEXT: s_wait_samplecnt 0x0
106111; GFX12-NEXT: s_wait_bvhcnt 0x0
107112; GFX12-NEXT: s_wait_kmcnt 0x0
108- ; GFX12-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
113+ ; GFX12-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0]
114+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
115+ ; GFX12-NEXT: v_pk_max_num_f16 v0, v0, 0
116+ ; GFX12-NEXT: v_pk_min_num_f16 v0, v0, 1.0
109117; GFX12-NEXT: s_setpc_b64 s[30:31]
110118 %fmul = fmul <2 x half > %a , <half 2 .0 , half 2 .0 >
111119 %maxnum = call <2 x half > @llvm.maxnum.v2f16 (<2 x half > <half 0 .0 , half poison>, <2 x half > %fmul )
@@ -231,7 +239,9 @@ define float @test_max_min_global_nnan(float %a) #3 {
231239; GFX10-LABEL: test_max_min_global_nnan:
232240; GFX10: ; %bb.0:
233241; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
234- ; GFX10-NEXT: v_max_f32_e64 v0, v0, v0 clamp
242+ ; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
243+ ; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
244+ ; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
235245; GFX10-NEXT: s_setpc_b64 s[30:31]
236246;
237247; GFX12-LABEL: test_max_min_global_nnan:
@@ -241,7 +251,9 @@ define float @test_max_min_global_nnan(float %a) #3 {
241251; GFX12-NEXT: s_wait_samplecnt 0x0
242252; GFX12-NEXT: s_wait_bvhcnt 0x0
243253; GFX12-NEXT: s_wait_kmcnt 0x0
244- ; GFX12-NEXT: v_max_num_f32_e64 v0, v0, v0 clamp
254+ ; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
255+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
256+ ; GFX12-NEXT: v_minmax_num_f32 v0, v0, 1.0, 0
245257; GFX12-NEXT: s_setpc_b64 s[30:31]
246258 %minnum = call float @llvm.minnum.f32 (float %a , float 1 .0 )
247259 %fmed = call float @llvm.maxnum.f32 (float %minnum , float 0 .0 )
@@ -305,9 +317,7 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
305317; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
306318; GFX10: ; %bb.0:
307319; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308- ; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
309- ; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
310- ; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
320+ ; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
311321; GFX10-NEXT: s_setpc_b64 s[30:31]
312322;
313323; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false:
@@ -317,7 +327,9 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
317327; GFX12-NEXT: s_wait_samplecnt 0x0
318328; GFX12-NEXT: s_wait_bvhcnt 0x0
319329; GFX12-NEXT: s_wait_kmcnt 0x0
320- ; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
330+ ; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
331+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
332+ ; GFX12-NEXT: v_med3_num_f32 v0, v0, 0, 1.0
321333; GFX12-NEXT: s_setpc_b64 s[30:31]
322334 %fmul = fmul float %a , 2 .0
323335 %maxnum = call float @llvm.maxnum.f32 (float %fmul , float 0 .0 )
@@ -341,7 +353,9 @@ define float @test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false(float %a) #
341353; GFX12-NEXT: s_wait_samplecnt 0x0
342354; GFX12-NEXT: s_wait_bvhcnt 0x0
343355; GFX12-NEXT: s_wait_kmcnt 0x0
344- ; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
356+ ; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
357+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
358+ ; GFX12-NEXT: v_med3_num_f32 v0, v0, 0, 1.0
345359; GFX12-NEXT: s_setpc_b64 s[30:31]
346360 %fmul = fmul float %a , 2 .0
347361 %maxnum = call float @llvm.maxnum.f32 (float %fmul , float 0 .0 )
@@ -381,9 +395,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
381395; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
382396; GFX10: ; %bb.0:
383397; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384- ; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
385- ; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
386- ; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
398+ ; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
387399; GFX10-NEXT: s_setpc_b64 s[30:31]
388400;
389401; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false:
0 commit comments