llvm
diff --git a/‎llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll‎
Lines changed: 18 additions & 7 deletions b/‎llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll‎
Lines changed: 18 additions & 7 deletions
diff --git a/‎llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll‎
Lines changed: 26 additions & 14 deletions b/‎llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll‎
Lines changed: 26 additions & 14 deletions
diff --git a/‎llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll‎
Lines changed: 17 additions & 11 deletions b/‎llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll‎
Lines changed: 17 additions & 11 deletions
@@ -74,7 +74,8 @@ define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp
 ; GFX10-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX10-NEXT:    v_med3_f32 v0, v0, 1.0, 0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
@@ -84,7 +85,9 @@ define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX12-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_med3_num_f32 v0, v0, 1.0, 0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fmul = fmul float %a, 2.0
   %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0)
@@ -97,7 +100,8 @@ define float @test_fmed3_global_nnan(float %a) #3 {
 ; GFX10-LABEL: test_fmed3_global_nnan:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX10-NEXT:    v_med3_f32 v0, v0, 0, 1.0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_fmed3_global_nnan:
@@ -107,7 +111,9 @@ define float @test_fmed3_global_nnan(float %a) #3 {
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX12-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_med3_num_f32 v0, v0, 0, 1.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fmul = fmul float %a, 2.0
   %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 0.0, float 1.0)
@@ -134,7 +140,9 @@ define float @test_fmed3_f32_maybe_NaN_ieee_false(float %a) #1 {
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX12-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_med3_num_f32 v0, v0, 1.0, 0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fmul = fmul float %a, 2.0
   %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0)
@@ -172,7 +180,8 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2
 ; GFX10-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX10-NEXT:    v_med3_f32 v0, v0, 0, 1.0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
@@ -182,7 +191,9 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX12-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_med3_num_f32 v0, v0, 0, 1.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fmul = fmul float %a, 2.0
   %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 0.0, float 1.0)
 
@@ -51,7 +51,8 @@ define half @test_min_K1max_ValK0_f16(half %a) #2 {
 ; GFX10-LABEL: test_min_K1max_ValK0_f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mul_f16_e64 v0, v0, 2.0 clamp
+; GFX10-NEXT:    v_mul_f16_e32 v0, 2.0, v0
+; GFX10-NEXT:    v_med3_f16 v0, v0, 0, 1.0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_min_K1max_ValK0_f16:
@@ -61,7 +62,9 @@ define half @test_min_K1max_ValK0_f16(half %a) #2 {
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_mul_f16_e64 v0, v0, 2.0 clamp
+; GFX12-NEXT:    v_mul_f16_e32 v0, 2.0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_med3_num_f16 v0, v0, 0, 1.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fmul = fmul half %a, 2.0
   %maxnum = call half @llvm.maxnum.f16(half %fmul, half 0.0)
@@ -95,7 +98,9 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
 ; GFX10-LABEL: test_min_max_splat_padded_with_undef:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
+; GFX10-NEXT:    v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0]
+; GFX10-NEXT:    v_pk_max_f16 v0, v0, 0
+; GFX10-NEXT:    v_pk_min_f16 v0, v0, 1.0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_min_max_splat_padded_with_undef:
@@ -105,7 +110,10 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
+; GFX12-NEXT:    v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0]
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT:    v_pk_max_num_f16 v0, v0, 0
+; GFX12-NEXT:    v_pk_min_num_f16 v0, v0, 1.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fmul = fmul <2 x half> %a, <half 2.0, half 2.0>
   %maxnum = call <2 x half> @llvm.maxnum.v2f16(<2 x half> <half 0.0, half poison>, <2 x half> %fmul)
@@ -231,7 +239,9 @@ define float @test_max_min_global_nnan(float %a) #3 {
 ; GFX10-LABEL: test_max_min_global_nnan:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e64 v0, v0, v0 clamp
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
+; GFX10-NEXT:    v_min_f32_e32 v0, 1.0, v0
+; GFX10-NEXT:    v_max_f32_e32 v0, 0, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_max_min_global_nnan:
@@ -241,7 +251,9 @@ define float @test_max_min_global_nnan(float %a) #3 {
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_max_num_f32_e64 v0, v0, v0 clamp
+; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_minmax_num_f32 v0, v0, 1.0, 0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %minnum = call float @llvm.minnum.f32(float %a, float 1.0)
   %fmed = call float @llvm.maxnum.f32(float %minnum, float 0.0)
@@ -305,9 +317,7 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
 ; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
-; GFX10-NEXT:    v_max_f32_e32 v0, 0, v0
-; GFX10-NEXT:    v_min_f32_e32 v0, 1.0, v0
+; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false:
@@ -317,7 +327,9 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX12-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_med3_num_f32 v0, v0, 0, 1.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fmul = fmul float %a, 2.0
   %maxnum = call float @llvm.maxnum.f32(float %fmul, float 0.0)
@@ -341,7 +353,9 @@ define float @test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false(float %a) #
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX12-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_med3_num_f32 v0, v0, 0, 1.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fmul = fmul float %a, 2.0
   %maxnum = call float @llvm.maxnum.f32(float %fmul, float 0.0)
@@ -381,9 +395,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
 ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
-; GFX10-NEXT:    v_min_f32_e32 v0, 1.0, v0
-; GFX10-NEXT:    v_max_f32_e32 v0, 0, v0
+; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false:
 
@@ -236,12 +236,14 @@ define float @test_min_max_global_nnan(float %a) #2 {
 ; GFX10-LABEL: test_min_max_global_nnan:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
 ; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_min_max_global_nnan:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -252,6 +254,8 @@ define float @test_min_max_global_nnan(float %a) #2 {
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX12-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %maxnum = call float @llvm.maxnum.f32(float %a, float 2.0)
@@ -263,13 +267,17 @@ define float @test_max_min_global_nnan(float %a) #2 {
 ; GFX10-LABEL: test_max_min_global_nnan:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
+; GFX10-NEXT:    v_min_f32_e32 v0, 4.0, v0
+; GFX10-NEXT:    v_max_f32_e32 v0, 2.0, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_max_min_global_nnan:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
+; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; GFX8-NEXT:    v_min_f32_e32 v0, 4.0, v0
+; GFX8-NEXT:    v_max_f32_e32 v0, 2.0, v0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_max_min_global_nnan:
@@ -279,7 +287,9 @@ define float @test_max_min_global_nnan(float %a) #2 {
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
+; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_minmax_num_f32 v0, v0, 4.0, 2.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %minnum = call float @llvm.minnum.f32(float %a, float 4.0)
   %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
@@ -456,15 +466,13 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
 ; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v0, 2.0, v0
-; GFX10-NEXT:    v_min_f32_e32 v0, 4.0, v0
+; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_min_max_maybe_NaN_input_ieee_false:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f32_e32 v0, 2.0, v0
-; GFX8-NEXT:    v_min_f32_e32 v0, 4.0, v0
+; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false:
@@ -489,15 +497,13 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
 ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f32_e32 v0, 4.0, v0
-; GFX10-NEXT:    v_max_f32_e32 v0, 2.0, v0
+; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_false:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f32_e32 v0, 4.0, v0
-; GFX8-NEXT:    v_max_f32_e32 v0, 2.0, v0
+; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false: