-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[AMDGPU][True16][MC] disable incorrect VOPC t16 instruction #120271
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU][True16][MC] disable incorrect VOPC t16 instruction #120271
Conversation
625a677 to
c4121e3
Compare
c4121e3 to
b80382f
Compare
|
@llvm/pr-subscribers-backend-amdgpu Author: Brox Chen (broxigarchen) ChangesThe current VOPC t16 instructions are not implemented in t16 format but actually fake16 format. Replace them with fake16 first and implement the true16 format one by one in the upcoming patches Full diff: https://github.com/llvm/llvm-project/pull/120271.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 3be865f03df1fd..041b9b4d66f63f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1125,8 +1125,9 @@ static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size,
unsigned FakeS16Opc, unsigned S32Opc,
unsigned S64Opc) {
if (Size == 16)
+ // FIXME-TRUE16 use TrueS16Opc when realtrue16 is supported for CMP code
return ST.hasTrue16BitInsts()
- ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
+ ? ST.useRealTrue16Insts() ? FakeS16Opc : FakeS16Opc
: S16Opc;
if (Size == 32)
return S32Opc;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index bc25d75131cc35..9e65b49c7e94e7 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2674,8 +2674,8 @@ let OtherPredicates = [NotHasTrue16BitInsts] in {
} // end OtherPredicates = [NotHasTrue16BitInsts]
let OtherPredicates = [HasTrue16BitInsts] in {
- def : FPToI1Pat<V_CMP_EQ_F16_t16_e64, CONST.FP16_ONE, i16, f16, fp_to_uint>;
- def : FPToI1Pat<V_CMP_EQ_F16_t16_e64, CONST.FP16_NEG_ONE, i16, f16, fp_to_sint>;
+ def : FPToI1Pat<V_CMP_EQ_F16_fake16_e64, CONST.FP16_ONE, i16, f16, fp_to_uint>;
+ def : FPToI1Pat<V_CMP_EQ_F16_fake16_e64, CONST.FP16_NEG_ONE, i16, f16, fp_to_sint>;
} // end OtherPredicates = [HasTrue16BitInsts]
def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, i32, f32, fp_to_uint>;
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index 9bf043ea334fea..33fd522dd0556d 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -1131,16 +1131,16 @@ defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;
defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;
let OtherPredicates = [HasTrue16BitInsts] in {
-defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_UGE, V_CMP_GE_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_ULT, V_CMP_LT_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_ULE, V_CMP_LE_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_t16_e64, i16>;
+defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_UGE, V_CMP_GE_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_ULT, V_CMP_LT_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_ULE, V_CMP_LE_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_fake16_e64, i16>;
} // End OtherPredicates = [HasTrue16BitInsts]
let OtherPredicates = [NotHasTrue16BitInsts] in {
@@ -1216,21 +1216,21 @@ defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
let OtherPredicates = [HasTrue16BitInsts] in {
-defm : FCMP_Pattern <COND_O, V_CMP_O_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_t16_e64, f16>;
-
-defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_t16_e64, f16>;
+defm : FCMP_Pattern <COND_O, V_CMP_O_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_fake16_e64, f16>;
+
+defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_fake16_e64, f16>;
} // End OtherPredicates = [HasTrue16BitInsts]
let OtherPredicates = [NotHasTrue16BitInsts] in {
@@ -1707,23 +1707,6 @@ multiclass VOPCX_Real_t16_gfx11_gfx12<bits<9> op, string asm_name,
VOPCX_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>,
VOPCX_Real_t16<GFX12Gen, op, asm_name, OpName, pseudo_mnemonic>;
-defm V_CMP_F_F16_t16 : VOPC_Real_t16_gfx11<0x000, "v_cmp_f_f16">;
-defm V_CMP_LT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x001, "v_cmp_lt_f16">;
-defm V_CMP_EQ_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x002, "v_cmp_eq_f16">;
-defm V_CMP_LE_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x003, "v_cmp_le_f16">;
-defm V_CMP_GT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x004, "v_cmp_gt_f16">;
-defm V_CMP_LG_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x005, "v_cmp_lg_f16">;
-defm V_CMP_GE_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x006, "v_cmp_ge_f16">;
-defm V_CMP_O_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x007, "v_cmp_o_f16">;
-defm V_CMP_U_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x008, "v_cmp_u_f16">;
-defm V_CMP_NGE_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x009, "v_cmp_nge_f16">;
-defm V_CMP_NLG_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00a, "v_cmp_nlg_f16">;
-defm V_CMP_NGT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00b, "v_cmp_ngt_f16">;
-defm V_CMP_NLE_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00c, "v_cmp_nle_f16">;
-defm V_CMP_NEQ_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00d, "v_cmp_neq_f16">;
-defm V_CMP_NLT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00e, "v_cmp_nlt_f16">;
-defm V_CMP_T_F16_t16 : VOPC_Real_t16_gfx11<0x00f, "v_cmp_t_f16", "V_CMP_TRU_F16_t16", "v_cmp_tru_f16">;
-
defm V_CMP_F_F16_fake16 : VOPC_Real_t16_gfx11<0x000, "v_cmp_f_f16">;
defm V_CMP_LT_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x001, "v_cmp_lt_f16">;
defm V_CMP_EQ_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x002, "v_cmp_eq_f16">;
@@ -1759,19 +1742,6 @@ defm V_CMP_NLT_F32 : VOPC_Real_gfx11_gfx12<0x01e>;
defm V_CMP_T_F32 : VOPC_Real_with_name_gfx11<0x01f, "V_CMP_TRU_F32", "v_cmp_t_f32">;
defm V_CMP_T_F64 : VOPC_Real_with_name_gfx11<0x02f, "V_CMP_TRU_F64", "v_cmp_t_f64">;
-defm V_CMP_LT_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x031, "v_cmp_lt_i16">;
-defm V_CMP_EQ_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x032, "v_cmp_eq_i16">;
-defm V_CMP_LE_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x033, "v_cmp_le_i16">;
-defm V_CMP_GT_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x034, "v_cmp_gt_i16">;
-defm V_CMP_NE_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x035, "v_cmp_ne_i16">;
-defm V_CMP_GE_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x036, "v_cmp_ge_i16">;
-defm V_CMP_LT_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x039, "v_cmp_lt_u16">;
-defm V_CMP_EQ_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03a, "v_cmp_eq_u16">;
-defm V_CMP_LE_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03b, "v_cmp_le_u16">;
-defm V_CMP_GT_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03c, "v_cmp_gt_u16">;
-defm V_CMP_NE_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03d, "v_cmp_ne_u16">;
-defm V_CMP_GE_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03e, "v_cmp_ge_u16">;
-
defm V_CMP_LT_I16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x031, "v_cmp_lt_i16">;
defm V_CMP_EQ_I16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x032, "v_cmp_eq_i16">;
defm V_CMP_LE_I16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x033, "v_cmp_le_i16">;
@@ -1819,28 +1789,10 @@ defm V_CMP_NE_U64 : VOPC_Real_gfx11_gfx12<0x05d>;
defm V_CMP_GE_U64 : VOPC_Real_gfx11_gfx12<0x05e>;
defm V_CMP_T_U64 : VOPC_Real_gfx11<0x05f>;
-defm V_CMP_CLASS_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x07d, "v_cmp_class_f16">;
defm V_CMP_CLASS_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x07d, "v_cmp_class_f16">;
defm V_CMP_CLASS_F32 : VOPC_Real_gfx11_gfx12<0x07e>;
defm V_CMP_CLASS_F64 : VOPC_Real_gfx11_gfx12<0x07f>;
-defm V_CMPX_F_F16_t16 : VOPCX_Real_t16_gfx11<0x080, "v_cmpx_f_f16">;
-defm V_CMPX_LT_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x081, "v_cmpx_lt_f16">;
-defm V_CMPX_EQ_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x082, "v_cmpx_eq_f16">;
-defm V_CMPX_LE_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x083, "v_cmpx_le_f16">;
-defm V_CMPX_GT_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x084, "v_cmpx_gt_f16">;
-defm V_CMPX_LG_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x085, "v_cmpx_lg_f16">;
-defm V_CMPX_GE_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x086, "v_cmpx_ge_f16">;
-defm V_CMPX_O_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x087, "v_cmpx_o_f16">;
-defm V_CMPX_U_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x088, "v_cmpx_u_f16">;
-defm V_CMPX_NGE_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x089, "v_cmpx_nge_f16">;
-defm V_CMPX_NLG_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08a, "v_cmpx_nlg_f16">;
-defm V_CMPX_NGT_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08b, "v_cmpx_ngt_f16">;
-defm V_CMPX_NLE_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08c, "v_cmpx_nle_f16">;
-defm V_CMPX_NEQ_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08d, "v_cmpx_neq_f16">;
-defm V_CMPX_NLT_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08e, "v_cmpx_nlt_f16">;
-defm V_CMPX_T_F16_t16 : VOPCX_Real_with_name_gfx11<0x08f, "V_CMPX_TRU_F16_t16", "v_cmpx_t_f16", "v_cmpx_tru_f16">;
-
defm V_CMPX_F_F16_fake16 : VOPCX_Real_t16_gfx11<0x080, "v_cmpx_f_f16">;
defm V_CMPX_LT_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x081, "v_cmpx_lt_f16">;
defm V_CMPX_EQ_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x082, "v_cmpx_eq_f16">;
@@ -1892,19 +1844,6 @@ defm V_CMPX_NEQ_F64 : VOPCX_Real_gfx11_gfx12<0x0ad>;
defm V_CMPX_NLT_F64 : VOPCX_Real_gfx11_gfx12<0x0ae>;
defm V_CMPX_T_F64 : VOPCX_Real_with_name_gfx11<0x0af, "V_CMPX_TRU_F64", "v_cmpx_t_f64">;
-defm V_CMPX_LT_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b1, "v_cmpx_lt_i16">;
-defm V_CMPX_EQ_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b2, "v_cmpx_eq_i16">;
-defm V_CMPX_LE_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b3, "v_cmpx_le_i16">;
-defm V_CMPX_GT_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b4, "v_cmpx_gt_i16">;
-defm V_CMPX_NE_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b5, "v_cmpx_ne_i16">;
-defm V_CMPX_GE_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b6, "v_cmpx_ge_i16">;
-defm V_CMPX_LT_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b9, "v_cmpx_lt_u16">;
-defm V_CMPX_EQ_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0ba, "v_cmpx_eq_u16">;
-defm V_CMPX_LE_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0bb, "v_cmpx_le_u16">;
-defm V_CMPX_GT_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0bc, "v_cmpx_gt_u16">;
-defm V_CMPX_NE_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0bd, "v_cmpx_ne_u16">;
-defm V_CMPX_GE_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0be, "v_cmpx_ge_u16">;
-
defm V_CMPX_LT_I16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x0b1, "v_cmpx_lt_i16">;
defm V_CMPX_EQ_I16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x0b2, "v_cmpx_eq_i16">;
defm V_CMPX_LE_I16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x0b3, "v_cmpx_le_i16">;
@@ -1951,7 +1890,6 @@ defm V_CMPX_GT_U64 : VOPCX_Real_gfx11_gfx12<0x0dc>;
defm V_CMPX_NE_U64 : VOPCX_Real_gfx11_gfx12<0x0dd>;
defm V_CMPX_GE_U64 : VOPCX_Real_gfx11_gfx12<0x0de>;
defm V_CMPX_T_U64 : VOPCX_Real_gfx11<0x0df>;
-defm V_CMPX_CLASS_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0fd, "v_cmpx_class_f16">;
defm V_CMPX_CLASS_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x0fd, "v_cmpx_class_f16">;
defm V_CMPX_CLASS_F32 : VOPCX_Real_gfx11_gfx12<0x0fe>;
defm V_CMPX_CLASS_F64 : VOPCX_Real_gfx11_gfx12<0x0ff>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
index 55015c6d13d8af..cdb67caea12cf0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
@@ -20,8 +20,8 @@ body: |
; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_]]
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_1]]
- ; GFX11-TRUE16-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
- ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_t16_e64_]]
+ ; GFX11-TRUE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_fake16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
;
; GFX11-FAKE16-LABEL: name: fcmp_false_f16
; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
@@ -58,8 +58,8 @@ body: |
; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_]]
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_1]]
- ; GFX11-TRUE16-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
- ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_t16_e64_]]
+ ; GFX11-TRUE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_fake16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
;
; GFX11-FAKE16-LABEL: name: fcmp_true_f16
; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
index 4241f945a87d53..ed811d37c3d0fc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
@@ -20,8 +20,8 @@ body: |
; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_]]
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_1]]
- ; GFX11-TRUE16-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
- ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_t16_e64_]]
+ ; GFX11-TRUE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_fake16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
;
; GFX11-FAKE16-LABEL: name: fcmp_false_f16
; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
@@ -58,8 +58,8 @@ body: |
; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_]]
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_1]]
- ; GFX11-TRUE16-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
- ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_t16_e64_]]
+ ; GFX11-TRUE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_fake16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
;
; GFX11-FAKE16-LABEL: name: fcmp_true_f16
; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
|
|
@llvm/pr-subscribers-llvm-globalisel Author: Brox Chen (broxigarchen) ChangesThe current VOPC t16 instructions are not implemented in t16 format but actually fake16 format. Replace them with fake16 first and implement the true16 format one by one in the upcoming patches Full diff: https://github.com/llvm/llvm-project/pull/120271.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 3be865f03df1fd..041b9b4d66f63f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1125,8 +1125,9 @@ static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size,
unsigned FakeS16Opc, unsigned S32Opc,
unsigned S64Opc) {
if (Size == 16)
+ // FIXME-TRUE16 use TrueS16Opc when realtrue16 is supported for CMP code
return ST.hasTrue16BitInsts()
- ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
+ ? ST.useRealTrue16Insts() ? FakeS16Opc : FakeS16Opc
: S16Opc;
if (Size == 32)
return S32Opc;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index bc25d75131cc35..9e65b49c7e94e7 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2674,8 +2674,8 @@ let OtherPredicates = [NotHasTrue16BitInsts] in {
} // end OtherPredicates = [NotHasTrue16BitInsts]
let OtherPredicates = [HasTrue16BitInsts] in {
- def : FPToI1Pat<V_CMP_EQ_F16_t16_e64, CONST.FP16_ONE, i16, f16, fp_to_uint>;
- def : FPToI1Pat<V_CMP_EQ_F16_t16_e64, CONST.FP16_NEG_ONE, i16, f16, fp_to_sint>;
+ def : FPToI1Pat<V_CMP_EQ_F16_fake16_e64, CONST.FP16_ONE, i16, f16, fp_to_uint>;
+ def : FPToI1Pat<V_CMP_EQ_F16_fake16_e64, CONST.FP16_NEG_ONE, i16, f16, fp_to_sint>;
} // end OtherPredicates = [HasTrue16BitInsts]
def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, i32, f32, fp_to_uint>;
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index 9bf043ea334fea..33fd522dd0556d 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -1131,16 +1131,16 @@ defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;
defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;
let OtherPredicates = [HasTrue16BitInsts] in {
-defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_UGE, V_CMP_GE_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_ULT, V_CMP_LT_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_ULE, V_CMP_LE_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_t16_e64, i16>;
+defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_UGE, V_CMP_GE_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_ULT, V_CMP_LT_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_ULE, V_CMP_LE_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_fake16_e64, i16>;
} // End OtherPredicates = [HasTrue16BitInsts]
let OtherPredicates = [NotHasTrue16BitInsts] in {
@@ -1216,21 +1216,21 @@ defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
let OtherPredicates = [HasTrue16BitInsts] in {
-defm : FCMP_Pattern <COND_O, V_CMP_O_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_t16_e64, f16>;
-
-defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_t16_e64, f16>;
+defm : FCMP_Pattern <COND_O, V_CMP_O_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_fake16_e64, f16>;
+
+defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_fake16_e64, f16>;
} // End OtherPredicates = [HasTrue16BitInsts]
let OtherPredicates = [NotHasTrue16BitInsts] in {
@@ -1707,23 +1707,6 @@ multiclass VOPCX_Real_t16_gfx11_gfx12<bits<9> op, string asm_name,
VOPCX_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>,
VOPCX_Real_t16<GFX12Gen, op, asm_name, OpName, pseudo_mnemonic>;
-defm V_CMP_F_F16_t16 : VOPC_Real_t16_gfx11<0x000, "v_cmp_f_f16">;
-defm V_CMP_LT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x001, "v_cmp_lt_f16">;
-defm V_CMP_EQ_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x002, "v_cmp_eq_f16">;
-defm V_CMP_LE_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x003, "v_cmp_le_f16">;
-defm V_CMP_GT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x004, "v_cmp_gt_f16">;
-defm V_CMP_LG_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x005, "v_cmp_lg_f16">;
-defm V_CMP_GE_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x006, "v_cmp_ge_f16">;
-defm V_CMP_O_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x007, "v_cmp_o_f16">;
-defm V_CMP_U_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x008, "v_cmp_u_f16">;
-defm V_CMP_NGE_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x009, "v_cmp_nge_f16">;
-defm V_CMP_NLG_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00a, "v_cmp_nlg_f16">;
-defm V_CMP_NGT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00b, "v_cmp_ngt_f16">;
-defm V_CMP_NLE_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00c, "v_cmp_nle_f16">;
-defm V_CMP_NEQ_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00d, "v_cmp_neq_f16">;
-defm V_CMP_NLT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00e, "v_cmp_nlt_f16">;
-defm V_CMP_T_F16_t16 : VOPC_Real_t16_gfx11<0x00f, "v_cmp_t_f16", "V_CMP_TRU_F16_t16", "v_cmp_tru_f16">;
-
defm V_CMP_F_F16_fake16 : VOPC_Real_t16_gfx11<0x000, "v_cmp_f_f16">;
defm V_CMP_LT_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x001, "v_cmp_lt_f16">;
defm V_CMP_EQ_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x002, "v_cmp_eq_f16">;
@@ -1759,19 +1742,6 @@ defm V_CMP_NLT_F32 : VOPC_Real_gfx11_gfx12<0x01e>;
defm V_CMP_T_F32 : VOPC_Real_with_name_gfx11<0x01f, "V_CMP_TRU_F32", "v_cmp_t_f32">;
defm V_CMP_T_F64 : VOPC_Real_with_name_gfx11<0x02f, "V_CMP_TRU_F64", "v_cmp_t_f64">;
-defm V_CMP_LT_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x031, "v_cmp_lt_i16">;
-defm V_CMP_EQ_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x032, "v_cmp_eq_i16">;
-defm V_CMP_LE_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x033, "v_cmp_le_i16">;
-defm V_CMP_GT_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x034, "v_cmp_gt_i16">;
-defm V_CMP_NE_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x035, "v_cmp_ne_i16">;
-defm V_CMP_GE_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x036, "v_cmp_ge_i16">;
-defm V_CMP_LT_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x039, "v_cmp_lt_u16">;
-defm V_CMP_EQ_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03a, "v_cmp_eq_u16">;
-defm V_CMP_LE_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03b, "v_cmp_le_u16">;
-defm V_CMP_GT_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03c, "v_cmp_gt_u16">;
-defm V_CMP_NE_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03d, "v_cmp_ne_u16">;
-defm V_CMP_GE_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03e, "v_cmp_ge_u16">;
-
defm V_CMP_LT_I16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x031, "v_cmp_lt_i16">;
defm V_CMP_EQ_I16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x032, "v_cmp_eq_i16">;
defm V_CMP_LE_I16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x033, "v_cmp_le_i16">;
@@ -1819,28 +1789,10 @@ defm V_CMP_NE_U64 : VOPC_Real_gfx11_gfx12<0x05d>;
defm V_CMP_GE_U64 : VOPC_Real_gfx11_gfx12<0x05e>;
defm V_CMP_T_U64 : VOPC_Real_gfx11<0x05f>;
-defm V_CMP_CLASS_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x07d, "v_cmp_class_f16">;
defm V_CMP_CLASS_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x07d, "v_cmp_class_f16">;
defm V_CMP_CLASS_F32 : VOPC_Real_gfx11_gfx12<0x07e>;
defm V_CMP_CLASS_F64 : VOPC_Real_gfx11_gfx12<0x07f>;
-defm V_CMPX_F_F16_t16 : VOPCX_Real_t16_gfx11<0x080, "v_cmpx_f_f16">;
-defm V_CMPX_LT_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x081, "v_cmpx_lt_f16">;
-defm V_CMPX_EQ_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x082, "v_cmpx_eq_f16">;
-defm V_CMPX_LE_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x083, "v_cmpx_le_f16">;
-defm V_CMPX_GT_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x084, "v_cmpx_gt_f16">;
-defm V_CMPX_LG_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x085, "v_cmpx_lg_f16">;
-defm V_CMPX_GE_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x086, "v_cmpx_ge_f16">;
-defm V_CMPX_O_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x087, "v_cmpx_o_f16">;
-defm V_CMPX_U_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x088, "v_cmpx_u_f16">;
-defm V_CMPX_NGE_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x089, "v_cmpx_nge_f16">;
-defm V_CMPX_NLG_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08a, "v_cmpx_nlg_f16">;
-defm V_CMPX_NGT_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08b, "v_cmpx_ngt_f16">;
-defm V_CMPX_NLE_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08c, "v_cmpx_nle_f16">;
-defm V_CMPX_NEQ_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08d, "v_cmpx_neq_f16">;
-defm V_CMPX_NLT_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08e, "v_cmpx_nlt_f16">;
-defm V_CMPX_T_F16_t16 : VOPCX_Real_with_name_gfx11<0x08f, "V_CMPX_TRU_F16_t16", "v_cmpx_t_f16", "v_cmpx_tru_f16">;
-
defm V_CMPX_F_F16_fake16 : VOPCX_Real_t16_gfx11<0x080, "v_cmpx_f_f16">;
defm V_CMPX_LT_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x081, "v_cmpx_lt_f16">;
defm V_CMPX_EQ_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x082, "v_cmpx_eq_f16">;
@@ -1892,19 +1844,6 @@ defm V_CMPX_NEQ_F64 : VOPCX_Real_gfx11_gfx12<0x0ad>;
defm V_CMPX_NLT_F64 : VOPCX_Real_gfx11_gfx12<0x0ae>;
defm V_CMPX_T_F64 : VOPCX_Real_with_name_gfx11<0x0af, "V_CMPX_TRU_F64", "v_cmpx_t_f64">;
-defm V_CMPX_LT_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b1, "v_cmpx_lt_i16">;
-defm V_CMPX_EQ_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b2, "v_cmpx_eq_i16">;
-defm V_CMPX_LE_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b3, "v_cmpx_le_i16">;
-defm V_CMPX_GT_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b4, "v_cmpx_gt_i16">;
-defm V_CMPX_NE_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b5, "v_cmpx_ne_i16">;
-defm V_CMPX_GE_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b6, "v_cmpx_ge_i16">;
-defm V_CMPX_LT_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b9, "v_cmpx_lt_u16">;
-defm V_CMPX_EQ_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0ba, "v_cmpx_eq_u16">;
-defm V_CMPX_LE_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0bb, "v_cmpx_le_u16">;
-defm V_CMPX_GT_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0bc, "v_cmpx_gt_u16">;
-defm V_CMPX_NE_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0bd, "v_cmpx_ne_u16">;
-defm V_CMPX_GE_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0be, "v_cmpx_ge_u16">;
-
defm V_CMPX_LT_I16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x0b1, "v_cmpx_lt_i16">;
defm V_CMPX_EQ_I16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x0b2, "v_cmpx_eq_i16">;
defm V_CMPX_LE_I16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x0b3, "v_cmpx_le_i16">;
@@ -1951,7 +1890,6 @@ defm V_CMPX_GT_U64 : VOPCX_Real_gfx11_gfx12<0x0dc>;
defm V_CMPX_NE_U64 : VOPCX_Real_gfx11_gfx12<0x0dd>;
defm V_CMPX_GE_U64 : VOPCX_Real_gfx11_gfx12<0x0de>;
defm V_CMPX_T_U64 : VOPCX_Real_gfx11<0x0df>;
-defm V_CMPX_CLASS_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0fd, "v_cmpx_class_f16">;
defm V_CMPX_CLASS_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x0fd, "v_cmpx_class_f16">;
defm V_CMPX_CLASS_F32 : VOPCX_Real_gfx11_gfx12<0x0fe>;
defm V_CMPX_CLASS_F64 : VOPCX_Real_gfx11_gfx12<0x0ff>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
index 55015c6d13d8af..cdb67caea12cf0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
@@ -20,8 +20,8 @@ body: |
; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_]]
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_1]]
- ; GFX11-TRUE16-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
- ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_t16_e64_]]
+ ; GFX11-TRUE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_fake16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
;
; GFX11-FAKE16-LABEL: name: fcmp_false_f16
; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
@@ -58,8 +58,8 @@ body: |
; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_]]
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_1]]
- ; GFX11-TRUE16-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
- ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_t16_e64_]]
+ ; GFX11-TRUE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_fake16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
;
; GFX11-FAKE16-LABEL: name: fcmp_true_f16
; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
index 4241f945a87d53..ed811d37c3d0fc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
@@ -20,8 +20,8 @@ body: |
; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_]]
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_1]]
- ; GFX11-TRUE16-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
- ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_t16_e64_]]
+ ; GFX11-TRUE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_fake16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
;
; GFX11-FAKE16-LABEL: name: fcmp_false_f16
; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
@@ -58,8 +58,8 @@ body: |
; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_]]
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_1]]
- ; GFX11-TRUE16-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
- ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_t16_e64_]]
+ ; GFX11-TRUE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_fake16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
;
; GFX11-FAKE16-LABEL: name: fcmp_true_f16
; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
|
Sisyph
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Typo pesudo -> pseudo in commit message
| defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>; | ||
| defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>; | ||
|
|
||
| let OtherPredicates = [HasTrue16BitInsts] in { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should be True16Predicate = UseFakeTrue16Insts ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated. I was thinking about updating this in the later patch but we can do this here as well
The current VOPC t16 instructions are not implemented with the correct t16 pseudo. Thus the current t16/fake16 instructions are all in fake16 format.
The plan is to remove the incorrect t16 instructions and refactor them. The first step is to remove them in this patch. The next step will be updating the t16/fake16 pseudo to the correct format and add back true16 instruction one by one in the upcoming patches.