-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[AMDGPU] Add gfx1250 V_ADD_{MIN|MAX}_{U|I}32 instructions #151379
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Add gfx1250 V_ADD_{MIN|MAX}_{U|I}32 instructions #151379
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
@llvm/pr-subscribers-mc Author: Stanislav Mekhanoshin (rampitec) ChangesPatch is 40.28 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151379.diff 13 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 071c9406a1517..8a0c4ac6ed8d7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2576,6 +2576,10 @@ def HasFmaakFmamkF64Insts :
Predicate<"Subtarget->hasFmaakFmamkF64Insts()">,
AssemblerPredicate<(any_of FeatureGFX1250Insts)>;
+def HasAddMinMaxInsts :
+ Predicate<"Subtarget->hasAddMinMaxInsts()">,
+ AssemblerPredicate<(any_of FeatureGFX1250Insts)>;
+
def HasPkAddMinMaxInsts :
Predicate<"Subtarget->hasPkAddMinMaxInsts()">,
AssemblerPredicate<(any_of FeatureGFX1250Insts)>;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index fba1c5a0f05e1..bdd900d748531 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1535,6 +1535,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// \returns true if the target has V_{MIN|MAX}_{I|U}64 instructions.
bool hasIntMinMax64() const { return GFX1250Insts; }
+ // \returns true if the target has V_ADD_{MIN|MAX}_{I|U}32 instructions.
+ bool hasAddMinMaxInsts() const { return GFX1250Insts; }
+
// \returns true if the target has V_PK_ADD_{MIN|MAX}_{I|U}16 instructions.
bool hasPkAddMinMaxInsts() const { return GFX1250Insts; }
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 5586dd872fef5..ad7057b657036 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -746,6 +746,13 @@ let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in {
defm V_MAXIMUM3_F16 : VOP3Inst_t16 <"v_maximum3_f16", VOP_F16_F16_F16_F16, AMDGPUfmaximum3>;
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
+let SubtargetPredicate = HasAddMinMaxInsts, isCommutable = 1, isReMaterializable = 1 in {
+ defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP_I32_I32_I32_I32>;
+ defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP_I32_I32_I32_I32>;
+ defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP_I32_I32_I32_I32>;
+ defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP_I32_I32_I32_I32>;
+}
+
defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>;
@@ -885,6 +892,13 @@ def : GCNPat<
(V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)
>;
+let SubtargetPredicate = HasAddMinMaxInsts in {
+def : ThreeOp_i32_Pats<add, smax, V_ADD_MAX_I32_e64>;
+def : ThreeOp_i32_Pats<add, umax, V_ADD_MAX_U32_e64>;
+def : ThreeOp_i32_Pats<add, smin, V_ADD_MIN_I32_e64>;
+def : ThreeOp_i32_Pats<add, umin, V_ADD_MIN_U32_e64>;
+}
+
def : VOPBinOpClampPat<saddsat, V_ADD_I32_e64, i32>;
def : VOPBinOpClampPat<ssubsat, V_SUB_I32_e64, i32>;
@@ -1821,6 +1835,10 @@ defm V_MIN_U64 : VOP3Only_Realtriple_gfx1250<0x318>;
defm V_MAX_U64 : VOP3Only_Realtriple_gfx1250<0x319>;
defm V_MIN_I64 : VOP3Only_Realtriple_gfx1250<0x31a>;
defm V_MAX_I64 : VOP3Only_Realtriple_gfx1250<0x31b>;
+defm V_ADD_MAX_I32 : VOP3Only_Realtriple_gfx1250<0x25e>;
+defm V_ADD_MAX_U32 : VOP3Only_Realtriple_gfx1250<0x25f>;
+defm V_ADD_MIN_I32 : VOP3Only_Realtriple_gfx1250<0x260>;
+defm V_ADD_MIN_U32 : VOP3Only_Realtriple_gfx1250<0x261>;
defm V_CVT_PK_FP8_F32 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x369, "v_cvt_pk_fp8_f32">;
defm V_CVT_PK_BF8_F32 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x36a, "v_cvt_pk_bf8_f32">;
diff --git a/llvm/test/CodeGen/AMDGPU/add-max.ll b/llvm/test/CodeGen/AMDGPU/add-max.ll
index b9925060aaec4..00c66563572ea 100644
--- a/llvm/test/CodeGen/AMDGPU/add-max.ll
+++ b/llvm/test/CodeGen/AMDGPU/add-max.ll
@@ -5,9 +5,7 @@
define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: add_max_u32_vvv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_nc_u32_e32 v0, v0, v1
-; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GCN-NEXT: v_max_u32_e32 v0, v0, v2
+; GCN-NEXT: v_add_max_u32_e64 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
@@ -18,9 +16,7 @@ define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) {
define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) {
; GCN-LABEL: add_max_u32_svv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_nc_u32_e32 v0, s0, v0
-; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GCN-NEXT: v_max_u32_e32 v0, v0, v1
+; GCN-NEXT: v_add_max_u32_e64 v0, s0, v0, v1
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
@@ -29,12 +25,17 @@ define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) {
}
define amdgpu_ps float @add_max_u32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) {
-; GCN-LABEL: add_max_u32_ssv:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_add_co_i32 s0, s0, s1
-; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GCN-NEXT: v_max_u32_e32 v0, s0, v0
-; GCN-NEXT: ; return to shader part epilog
+; SDAG-LABEL: add_max_u32_ssv:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: v_add_max_u32_e64 v0, s0, s1, v0
+; SDAG-NEXT: ; return to shader part epilog
+;
+; GISEL-LABEL: add_max_u32_ssv:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_add_co_i32 s0, s0, s1
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GISEL-NEXT: v_max_u32_e32 v0, s0, v0
+; GISEL-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
%ret = bitcast i32 %max to float
@@ -58,9 +59,7 @@ define amdgpu_ps float @add_max_u32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c
define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) {
; GCN-LABEL: add_max_u32_vsi:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_nc_u32_e32 v0, s0, v0
-; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GCN-NEXT: v_max_u32_e32 v0, 4, v0
+; GCN-NEXT: v_add_max_u32_e64 v0, v0, s0, 4
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umax.i32(i32 %add, i32 4)
@@ -71,9 +70,7 @@ define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) {
define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) {
; GCN-LABEL: add_max_u32_svl:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_nc_u32_e32 v0, s0, v0
-; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GCN-NEXT: v_max_u32_e32 v0, 0x64, v0
+; GCN-NEXT: v_add_max_u32_e64 v0, s0, v0, 0x64
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umax.i32(i32 %add, i32 100)
@@ -82,12 +79,17 @@ define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) {
}
define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) {
-; GCN-LABEL: add_max_u32_slv:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_addk_co_i32 s0, 0x64
-; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GCN-NEXT: v_max_u32_e32 v0, s0, v0
-; GCN-NEXT: ; return to shader part epilog
+; SDAG-LABEL: add_max_u32_slv:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: v_add_max_u32_e64 v0, 0x64, s0, v0
+; SDAG-NEXT: ; return to shader part epilog
+;
+; GISEL-LABEL: add_max_u32_slv:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_addk_co_i32 s0, 0x64
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GISEL-NEXT: v_max_u32_e32 v0, s0, v0
+; GISEL-NEXT: ; return to shader part epilog
%add = add i32 %a, 100
%max = call i32 @llvm.umax.i32(i32 %add, i32 %b)
%ret = bitcast i32 %max to float
@@ -97,9 +99,7 @@ define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) {
define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: add_max_i32_vvv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_nc_u32_e32 v0, v0, v1
-; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GCN-NEXT: v_max_i32_e32 v0, v0, v2
+; GCN-NEXT: v_add_max_i32_e64 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.smax.i32(i32 %add, i32 %c)
@@ -110,9 +110,7 @@ define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) {
define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: add_min_u32_vvv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_nc_u32_e32 v0, v0, v1
-; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GCN-NEXT: v_min_u32_e32 v0, v0, v2
+; GCN-NEXT: v_add_min_u32_e64 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umin.i32(i32 %add, i32 %c)
@@ -123,9 +121,7 @@ define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) {
define amdgpu_ps float @add_min_i32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: add_min_i32_vvv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_nc_u32_e32 v0, v0, v1
-; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GCN-NEXT: v_min_i32_e32 v0, v0, v2
+; GCN-NEXT: v_add_min_i32_e64 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.smin.i32(i32 %add, i32 %c)
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
index 04c55eedc18d4..a2508057b16af 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
@@ -217,6 +217,66 @@ v_mad_nc_i64_i32 v[2:3], v4, v7, 12345
v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp
// GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04]
+v_add_min_i32 v2, s4, v7, v8
+// GFX1250: v_add_min_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_min_i32 v2, v4, 0, 1
+// GFX1250: v_add_min_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_min_i32 v2, v4, 3, s2
+// GFX1250: v_add_min_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_min_i32 v2, s4, 4, v2
+// GFX1250: v_add_min_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_min_i32 v2, v4, v7, 12345
+// GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_i32 v2, s4, v7, v8
+// GFX1250: v_add_max_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_max_i32 v2, v4, 0, 1
+// GFX1250: v_add_max_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_max_i32 v2, v4, 3, s2
+// GFX1250: v_add_max_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_max_i32 v2, s4, 4, v2
+// GFX1250: v_add_max_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_max_i32 v2, v4, v7, 12345
+// GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_min_u32 v2, s4, v7, v8
+// GFX1250: v_add_min_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_min_u32 v2, v4, 0, 1
+// GFX1250: v_add_min_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_min_u32 v2, v4, 3, s2
+// GFX1250: v_add_min_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_min_u32 v2, s4, 4, v2
+// GFX1250: v_add_min_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_min_u32 v2, v4, v7, 12345
+// GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_u32 v2, s4, v7, v8
+// GFX1250: v_add_max_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_max_u32 v2, v4, 0, 1
+// GFX1250: v_add_max_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_max_u32 v2, v4, 3, s2
+// GFX1250: v_add_max_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_max_u32 v2, s4, 4, v2
+// GFX1250: v_add_max_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_max_u32 v2, v4, v7, 12345
+// GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
v_cvt_pk_bf16_f32 v5, v1, v2
// GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
index ebfeb3f74d752..52a18cd82010c 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
@@ -217,6 +217,66 @@ v_mad_nc_i64_i32 v[2:3], v4, v7, 12345
v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp
// GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04]
+v_add_min_i32 v2, s4, v7, v8
+// GFX1250: v_add_min_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_min_i32 v2, v4, 0, 1
+// GFX1250: v_add_min_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_min_i32 v2, v4, 3, s2
+// GFX1250: v_add_min_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_min_i32 v2, s4, 4, v2
+// GFX1250: v_add_min_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_min_i32 v2, v4, v7, 12345
+// GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_i32 v2, s4, v7, v8
+// GFX1250: v_add_max_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_max_i32 v2, v4, 0, 1
+// GFX1250: v_add_max_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_max_i32 v2, v4, 3, s2
+// GFX1250: v_add_max_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_max_i32 v2, s4, 4, v2
+// GFX1250: v_add_max_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_max_i32 v2, v4, v7, 12345
+// GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_min_u32 v2, s4, v7, v8
+// GFX1250: v_add_min_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_min_u32 v2, v4, 0, 1
+// GFX1250: v_add_min_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_min_u32 v2, v4, 3, s2
+// GFX1250: v_add_min_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_min_u32 v2, s4, 4, v2
+// GFX1250: v_add_min_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_min_u32 v2, v4, v7, 12345
+// GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_u32 v2, s4, v7, v8
+// GFX1250: v_add_max_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_max_u32 v2, v4, 0, 1
+// GFX1250: v_add_max_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_max_u32 v2, v4, 3, s2
+// GFX1250: v_add_max_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_max_u32 v2, s4, 4, v2
+// GFX1250: v_add_max_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_max_u32 v2, v4, v7, 12345
+// GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
v_cvt_pk_bf16_f32 v5, v1, v2
// GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s
index d9c7645543db2..934186fb4fde4 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s
@@ -146,6 +146,54 @@ v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:102 op_sel:[1,1,1,1] quad_perm:[0,1,2
// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:0x66 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x33,0xd6,0xfa,0x04,0x0e,0xcc,0x01,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_add_min_i32 v2, v4, v7, v8 quad_perm:[1,2,3,1]
+// GFX1250: v_add_min_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x60,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_min_i32 v2, v4, v7, v8 row_share:3 fi:1
+// GFX1250: v_add_min_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x60,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_min_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_add_min_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x60,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_max_i32 v2, v4, v7, v8 quad_perm:[3,2,1,0]
+// GFX1250: v_add_max_i32_e64_dpp v2, v4, v7, v8 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x5e,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_max_i32 v2, v4, v7, v8 row_share:3 fi:1
+// GFX1250: v_add_max_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x5e,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_max_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_add_max_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x5e,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_min_u32 v2, v4, v7, v8 quad_perm:[3,2,1,0]
+// GFX1250: v_add_min_u32_e64_dpp v2, v4, v7, v8 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x61,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_min_u32 v2, v4, v7, v8 row_share:3 fi:1
+// GFX1250: v_add_min_u32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x61,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_min_u32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_add_min_u32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x61,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_max_u32 v2, v4, v7, v8 quad_perm:[3,2,1,0]
+// GFX1250: v_add_max_u32_e64_dpp v2, v4, v7, v8 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x5f,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_max_u32 v2, v4, v7, v8 row_share:3 fi:1
+// GFX1250: v_add_max_u32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x5f,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_max_u32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_add_max_u32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x5f,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]...
[truncated]
|
|
@llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) ChangesPatch is 40.28 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151379.diff 13 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 071c9406a1517..8a0c4ac6ed8d7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2576,6 +2576,10 @@ def HasFmaakFmamkF64Insts :
Predicate<"Subtarget->hasFmaakFmamkF64Insts()">,
AssemblerPredicate<(any_of FeatureGFX1250Insts)>;
+def HasAddMinMaxInsts :
+ Predicate<"Subtarget->hasAddMinMaxInsts()">,
+ AssemblerPredicate<(any_of FeatureGFX1250Insts)>;
+
def HasPkAddMinMaxInsts :
Predicate<"Subtarget->hasPkAddMinMaxInsts()">,
AssemblerPredicate<(any_of FeatureGFX1250Insts)>;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index fba1c5a0f05e1..bdd900d748531 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1535,6 +1535,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// \returns true if the target has V_{MIN|MAX}_{I|U}64 instructions.
bool hasIntMinMax64() const { return GFX1250Insts; }
+ // \returns true if the target has V_ADD_{MIN|MAX}_{I|U}32 instructions.
+ bool hasAddMinMaxInsts() const { return GFX1250Insts; }
+
// \returns true if the target has V_PK_ADD_{MIN|MAX}_{I|U}16 instructions.
bool hasPkAddMinMaxInsts() const { return GFX1250Insts; }
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 5586dd872fef5..ad7057b657036 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -746,6 +746,13 @@ let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in {
defm V_MAXIMUM3_F16 : VOP3Inst_t16 <"v_maximum3_f16", VOP_F16_F16_F16_F16, AMDGPUfmaximum3>;
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
+let SubtargetPredicate = HasAddMinMaxInsts, isCommutable = 1, isReMaterializable = 1 in {
+ defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP_I32_I32_I32_I32>;
+ defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP_I32_I32_I32_I32>;
+ defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP_I32_I32_I32_I32>;
+ defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP_I32_I32_I32_I32>;
+}
+
defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>;
@@ -885,6 +892,13 @@ def : GCNPat<
(V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)
>;
+let SubtargetPredicate = HasAddMinMaxInsts in {
+def : ThreeOp_i32_Pats<add, smax, V_ADD_MAX_I32_e64>;
+def : ThreeOp_i32_Pats<add, umax, V_ADD_MAX_U32_e64>;
+def : ThreeOp_i32_Pats<add, smin, V_ADD_MIN_I32_e64>;
+def : ThreeOp_i32_Pats<add, umin, V_ADD_MIN_U32_e64>;
+}
+
def : VOPBinOpClampPat<saddsat, V_ADD_I32_e64, i32>;
def : VOPBinOpClampPat<ssubsat, V_SUB_I32_e64, i32>;
@@ -1821,6 +1835,10 @@ defm V_MIN_U64 : VOP3Only_Realtriple_gfx1250<0x318>;
defm V_MAX_U64 : VOP3Only_Realtriple_gfx1250<0x319>;
defm V_MIN_I64 : VOP3Only_Realtriple_gfx1250<0x31a>;
defm V_MAX_I64 : VOP3Only_Realtriple_gfx1250<0x31b>;
+defm V_ADD_MAX_I32 : VOP3Only_Realtriple_gfx1250<0x25e>;
+defm V_ADD_MAX_U32 : VOP3Only_Realtriple_gfx1250<0x25f>;
+defm V_ADD_MIN_I32 : VOP3Only_Realtriple_gfx1250<0x260>;
+defm V_ADD_MIN_U32 : VOP3Only_Realtriple_gfx1250<0x261>;
defm V_CVT_PK_FP8_F32 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x369, "v_cvt_pk_fp8_f32">;
defm V_CVT_PK_BF8_F32 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x36a, "v_cvt_pk_bf8_f32">;
diff --git a/llvm/test/CodeGen/AMDGPU/add-max.ll b/llvm/test/CodeGen/AMDGPU/add-max.ll
index b9925060aaec4..00c66563572ea 100644
--- a/llvm/test/CodeGen/AMDGPU/add-max.ll
+++ b/llvm/test/CodeGen/AMDGPU/add-max.ll
@@ -5,9 +5,7 @@
define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: add_max_u32_vvv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_nc_u32_e32 v0, v0, v1
-; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GCN-NEXT: v_max_u32_e32 v0, v0, v2
+; GCN-NEXT: v_add_max_u32_e64 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
@@ -18,9 +16,7 @@ define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) {
define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) {
; GCN-LABEL: add_max_u32_svv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_nc_u32_e32 v0, s0, v0
-; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GCN-NEXT: v_max_u32_e32 v0, v0, v1
+; GCN-NEXT: v_add_max_u32_e64 v0, s0, v0, v1
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
@@ -29,12 +25,17 @@ define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) {
}
define amdgpu_ps float @add_max_u32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) {
-; GCN-LABEL: add_max_u32_ssv:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_add_co_i32 s0, s0, s1
-; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GCN-NEXT: v_max_u32_e32 v0, s0, v0
-; GCN-NEXT: ; return to shader part epilog
+; SDAG-LABEL: add_max_u32_ssv:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: v_add_max_u32_e64 v0, s0, s1, v0
+; SDAG-NEXT: ; return to shader part epilog
+;
+; GISEL-LABEL: add_max_u32_ssv:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_add_co_i32 s0, s0, s1
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GISEL-NEXT: v_max_u32_e32 v0, s0, v0
+; GISEL-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
%ret = bitcast i32 %max to float
@@ -58,9 +59,7 @@ define amdgpu_ps float @add_max_u32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c
define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) {
; GCN-LABEL: add_max_u32_vsi:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_nc_u32_e32 v0, s0, v0
-; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GCN-NEXT: v_max_u32_e32 v0, 4, v0
+; GCN-NEXT: v_add_max_u32_e64 v0, v0, s0, 4
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umax.i32(i32 %add, i32 4)
@@ -71,9 +70,7 @@ define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) {
define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) {
; GCN-LABEL: add_max_u32_svl:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_nc_u32_e32 v0, s0, v0
-; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GCN-NEXT: v_max_u32_e32 v0, 0x64, v0
+; GCN-NEXT: v_add_max_u32_e64 v0, s0, v0, 0x64
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umax.i32(i32 %add, i32 100)
@@ -82,12 +79,17 @@ define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) {
}
define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) {
-; GCN-LABEL: add_max_u32_slv:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_addk_co_i32 s0, 0x64
-; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GCN-NEXT: v_max_u32_e32 v0, s0, v0
-; GCN-NEXT: ; return to shader part epilog
+; SDAG-LABEL: add_max_u32_slv:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: v_add_max_u32_e64 v0, 0x64, s0, v0
+; SDAG-NEXT: ; return to shader part epilog
+;
+; GISEL-LABEL: add_max_u32_slv:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_addk_co_i32 s0, 0x64
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GISEL-NEXT: v_max_u32_e32 v0, s0, v0
+; GISEL-NEXT: ; return to shader part epilog
%add = add i32 %a, 100
%max = call i32 @llvm.umax.i32(i32 %add, i32 %b)
%ret = bitcast i32 %max to float
@@ -97,9 +99,7 @@ define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) {
define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: add_max_i32_vvv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_nc_u32_e32 v0, v0, v1
-; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GCN-NEXT: v_max_i32_e32 v0, v0, v2
+; GCN-NEXT: v_add_max_i32_e64 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.smax.i32(i32 %add, i32 %c)
@@ -110,9 +110,7 @@ define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) {
define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: add_min_u32_vvv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_nc_u32_e32 v0, v0, v1
-; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GCN-NEXT: v_min_u32_e32 v0, v0, v2
+; GCN-NEXT: v_add_min_u32_e64 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umin.i32(i32 %add, i32 %c)
@@ -123,9 +121,7 @@ define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) {
define amdgpu_ps float @add_min_i32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: add_min_i32_vvv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_nc_u32_e32 v0, v0, v1
-; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GCN-NEXT: v_min_i32_e32 v0, v0, v2
+; GCN-NEXT: v_add_min_i32_e64 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.smin.i32(i32 %add, i32 %c)
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
index 04c55eedc18d4..a2508057b16af 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
@@ -217,6 +217,66 @@ v_mad_nc_i64_i32 v[2:3], v4, v7, 12345
v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp
// GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04]
+v_add_min_i32 v2, s4, v7, v8
+// GFX1250: v_add_min_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_min_i32 v2, v4, 0, 1
+// GFX1250: v_add_min_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_min_i32 v2, v4, 3, s2
+// GFX1250: v_add_min_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_min_i32 v2, s4, 4, v2
+// GFX1250: v_add_min_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_min_i32 v2, v4, v7, 12345
+// GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_i32 v2, s4, v7, v8
+// GFX1250: v_add_max_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_max_i32 v2, v4, 0, 1
+// GFX1250: v_add_max_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_max_i32 v2, v4, 3, s2
+// GFX1250: v_add_max_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_max_i32 v2, s4, 4, v2
+// GFX1250: v_add_max_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_max_i32 v2, v4, v7, 12345
+// GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_min_u32 v2, s4, v7, v8
+// GFX1250: v_add_min_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_min_u32 v2, v4, 0, 1
+// GFX1250: v_add_min_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_min_u32 v2, v4, 3, s2
+// GFX1250: v_add_min_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_min_u32 v2, s4, 4, v2
+// GFX1250: v_add_min_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_min_u32 v2, v4, v7, 12345
+// GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_u32 v2, s4, v7, v8
+// GFX1250: v_add_max_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_max_u32 v2, v4, 0, 1
+// GFX1250: v_add_max_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_max_u32 v2, v4, 3, s2
+// GFX1250: v_add_max_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_max_u32 v2, s4, 4, v2
+// GFX1250: v_add_max_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_max_u32 v2, v4, v7, 12345
+// GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
v_cvt_pk_bf16_f32 v5, v1, v2
// GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
index ebfeb3f74d752..52a18cd82010c 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
@@ -217,6 +217,66 @@ v_mad_nc_i64_i32 v[2:3], v4, v7, 12345
v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp
// GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04]
+v_add_min_i32 v2, s4, v7, v8
+// GFX1250: v_add_min_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_min_i32 v2, v4, 0, 1
+// GFX1250: v_add_min_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_min_i32 v2, v4, 3, s2
+// GFX1250: v_add_min_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_min_i32 v2, s4, 4, v2
+// GFX1250: v_add_min_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_min_i32 v2, v4, v7, 12345
+// GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_i32 v2, s4, v7, v8
+// GFX1250: v_add_max_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_max_i32 v2, v4, 0, 1
+// GFX1250: v_add_max_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_max_i32 v2, v4, 3, s2
+// GFX1250: v_add_max_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_max_i32 v2, s4, 4, v2
+// GFX1250: v_add_max_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_max_i32 v2, v4, v7, 12345
+// GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_min_u32 v2, s4, v7, v8
+// GFX1250: v_add_min_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_min_u32 v2, v4, 0, 1
+// GFX1250: v_add_min_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_min_u32 v2, v4, 3, s2
+// GFX1250: v_add_min_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_min_u32 v2, s4, 4, v2
+// GFX1250: v_add_min_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_min_u32 v2, v4, v7, 12345
+// GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_u32 v2, s4, v7, v8
+// GFX1250: v_add_max_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_max_u32 v2, v4, 0, 1
+// GFX1250: v_add_max_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_max_u32 v2, v4, 3, s2
+// GFX1250: v_add_max_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_max_u32 v2, s4, 4, v2
+// GFX1250: v_add_max_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_max_u32 v2, v4, v7, 12345
+// GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
v_cvt_pk_bf16_f32 v5, v1, v2
// GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s
index d9c7645543db2..934186fb4fde4 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s
@@ -146,6 +146,54 @@ v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:102 op_sel:[1,1,1,1] quad_perm:[0,1,2
// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:0x66 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x33,0xd6,0xfa,0x04,0x0e,0xcc,0x01,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_add_min_i32 v2, v4, v7, v8 quad_perm:[1,2,3,1]
+// GFX1250: v_add_min_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x60,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_min_i32 v2, v4, v7, v8 row_share:3 fi:1
+// GFX1250: v_add_min_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x60,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_min_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_add_min_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x60,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_max_i32 v2, v4, v7, v8 quad_perm:[3,2,1,0]
+// GFX1250: v_add_max_i32_e64_dpp v2, v4, v7, v8 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x5e,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_max_i32 v2, v4, v7, v8 row_share:3 fi:1
+// GFX1250: v_add_max_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x5e,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_max_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_add_max_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x5e,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_min_u32 v2, v4, v7, v8 quad_perm:[3,2,1,0]
+// GFX1250: v_add_min_u32_e64_dpp v2, v4, v7, v8 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x61,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_min_u32 v2, v4, v7, v8 row_share:3 fi:1
+// GFX1250: v_add_min_u32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x61,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_min_u32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_add_min_u32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x61,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_max_u32 v2, v4, v7, v8 quad_perm:[3,2,1,0]
+// GFX1250: v_add_max_u32_e64_dpp v2, v4, v7, v8 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x5f,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_max_u32 v2, v4, v7, v8 row_share:3 fi:1
+// GFX1250: v_add_max_u32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x5f,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_max_u32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_add_max_u32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x5f,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]...
[truncated]
|

No description provided.