@@ -569,16 +569,10 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
569569 getAsmVOP3OpSel<3, HasClamp, HasOMod,
570570 HasSrc0FloatMods, HasSrc1FloatMods,
571571 HasSrc2FloatMods>.ret);
572- let AsmVOP3DPP16 = !subst(", $src2_modifiers", "",
573- getAsmVOP3DPP16<getAsmVOP3Base<3, 1, HasClamp, 1,
574- HasOMod, 0, 1, HasSrc0FloatMods,
575- HasSrc1FloatMods,
576- HasSrc2FloatMods>.ret>.ret);
577- let AsmVOP3DPP8 = !subst(", $src2_modifiers", "",
578- getAsmVOP3DPP8<getAsmVOP3Base<3, 1, HasClamp, 1,
579- HasOMod, 0, 1, HasSrc0FloatMods,
580- HasSrc1FloatMods,
581- HasSrc2FloatMods>.ret>.ret);
572+ let AsmVOP3Base = !subst(", $src2_modifiers", "",
573+ getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
574+ HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, 0/*Src1Mods*/,
575+ HasModifiers, DstVT>.ret);
582576}
583577
584578class VOP3_CVT_SR_F8_ByteSel_Profile<ValueType SrcVT> :
@@ -636,8 +630,8 @@ let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
636630 defm V_MAXIMUM3_F16 : VOP3Inst <"v_maximum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmaximum3>;
637631} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
638632
639- defm V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile< VOP_I16_I16_I16, VOP3_OPSEL> >;
640- defm V_SUB_I16 : VOP3Inst <"v_sub_i16", VOP3_Profile< VOP_I16_I16_I16, VOP3_OPSEL> >;
633+ defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
634+ defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>;
641635
642636defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
643637defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
@@ -752,6 +746,8 @@ def : GCNPat<(DivergentBinFrag<or> (or_oneuse i64:$src0, i64:$src1), i64:$src2),
752746 (i32 (EXTRACT_SUBREG $src1, sub1)),
753747 (i32 (EXTRACT_SUBREG $src2, sub1))), sub1)>;
754748
749+ } // End SubtargetPredicate = isGFX9Plus
750+
755751// FIXME: Probably should hardcode clamp bit in pseudo and avoid this.
756752class OpSelBinOpClampPat<SDPatternOperator node,
757753 Instruction inst> : GCNPat<
@@ -760,9 +756,14 @@ class OpSelBinOpClampPat<SDPatternOperator node,
760756 (inst $src0_modifiers, $src0, $src1_modifiers, $src1, DSTCLAMP.ENABLE, 0)
761757>;
762758
763- def : OpSelBinOpClampPat<saddsat, V_ADD_I16_e64>;
764- def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_e64>;
765- } // End SubtargetPredicate = isGFX9Plus
759+ let SubtargetPredicate = isGFX9Plus, True16Predicate = NotHasTrue16BitInsts in {
760+ def : OpSelBinOpClampPat<saddsat, V_ADD_I16_e64>;
761+ def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_e64>;
762+ } // End SubtargetPredicate = isGFX9Plus, True16Predicate = NotHasTrue16BitInsts
763+ let True16Predicate = UseFakeTrue16Insts in {
764+ def : OpSelBinOpClampPat<saddsat, V_ADD_I16_fake16_e64>;
765+ def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_fake16_e64>;
766+ } // End True16Predicate = UseFakeTrue16Insts
766767
767768multiclass IMAD32_Pats <VOP3_Pseudo inst> {
768769 def : GCNPat <
@@ -871,21 +872,31 @@ let SubtargetPredicate = isGFX10Plus in {
871872 def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64, vt>;
872873 }
873874
874- defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;
875- defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, sub>;
876-
877- def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;
878- def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_e64>;
879-
880- // Undo sub x, c -> add x, -c canonicalization since c is more likely
881- // an inline immediate than -c.
882- def : GCNPat<
883- (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
884- (V_SUB_NC_U16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
885- >;
875+ defm V_ADD_NC_U16 : VOP3Inst_t16 <"v_add_nc_u16", VOP_I16_I16_I16, add>;
876+ defm V_SUB_NC_U16 : VOP3Inst_t16 <"v_sub_nc_u16", VOP_I16_I16_I16, sub>;
886877
887878} // End SubtargetPredicate = isGFX10Plus
888879
880+ let True16Predicate = NotHasTrue16BitInsts, SubtargetPredicate = isGFX10Plus in {
881+ def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;
882+ def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_e64>;
883+ // Undo sub x, c -> add x, -c canonicalization since c is more likely
884+ // an inline immediate than -c.
885+ def : GCNPat<
886+ (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
887+ (V_SUB_NC_U16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
888+ >;
889+ } // End True16Predicate = NotHasTrue16BitInsts, SubtargetPredicate = isGFX10Plus
890+
891+ let True16Predicate = UseFakeTrue16Insts in {
892+ def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_fake16_e64>;
893+ def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_fake16_e64>;
894+ def : GCNPat<
895+ (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
896+ (V_SUB_NC_U16_fake16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
897+ >;
898+ } // End True16Predicate = UseFakeTrue16Insts
899+
889900let SubtargetPredicate = isGFX12Plus in {
890901 let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
891902 defm V_PERMLANE16_VAR_B32 : VOP3Inst<"v_permlane16_var_b32", VOP3_PERMLANE_VAR_Profile>;
@@ -1104,6 +1115,17 @@ multiclass VOP3_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
11041115multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op> :
11051116 VOP3Dot_Realtriple<GFX11Gen, op>, VOP3Dot_Realtriple<GFX12Gen, op>;
11061117
1118+ multiclass VOP3_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
1119+ string pseudo_mnemonic = "", bit isSingle = 0> :
1120+ VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName, pseudo_mnemonic, isSingle>,
1121+ VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
1122+
1123+ multiclass VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
1124+ string pseudo_mnemonic = "", bit isSingle = 0> {
1125+ defm opName#"_t16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>;
1126+ defm opName#"_fake16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
1127+ }
1128+
11071129multiclass VOP3be_Real_gfx11_gfx12<bits<10> op, string opName, string asmName> :
11081130 VOP3be_Real<GFX11Gen, op, opName, asmName>,
11091131 VOP3be_Real<GFX12Gen, op, opName, asmName>;
@@ -1189,17 +1211,17 @@ defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11_gfx12<0x2fc, "V_DIV_SCALE_F32", "
11891211defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11_gfx12<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
11901212defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">;
11911213defm V_MAD_I64_I32_gfx11 : VOP3be_Real_gfx11<0x2ff, "V_MAD_I64_I32_gfx11", "v_mad_i64_i32">;
1192- defm V_ADD_NC_U16 : VOP3Only_Realtriple_gfx11_gfx12 <0x303>;
1193- defm V_SUB_NC_U16 : VOP3Only_Realtriple_gfx11_gfx12 <0x304>;
1214+ defm V_ADD_NC_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12 <0x303, "v_add_nc_u16" >;
1215+ defm V_SUB_NC_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12 <0x304, "v_sub_nc_u16" >;
11941216defm V_MUL_LO_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x305, "v_mul_lo_u16">;
11951217defm V_CVT_PK_I16_F32 : VOP3_Realtriple_gfx11_gfx12<0x306>;
11961218defm V_CVT_PK_U16_F32 : VOP3_Realtriple_gfx11_gfx12<0x307>;
11971219defm V_MAX_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x309, "v_max_u16">;
11981220defm V_MAX_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30a, "v_max_i16">;
11991221defm V_MIN_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30b, "v_min_u16">;
12001222defm V_MIN_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30c, "v_min_i16">;
1201- defm V_ADD_NC_I16 : VOP3_Realtriple_with_name_gfx11_gfx12 <0x30d, "V_ADD_I16 ", "v_add_nc_i16 ">;
1202- defm V_SUB_NC_I16 : VOP3_Realtriple_with_name_gfx11_gfx12 <0x30e, "V_SUB_I16 ", "v_sub_nc_i16 ">;
1223+ defm V_ADD_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12 <0x30d, "v_add_nc_i16 ", "V_ADD_I16 ">;
1224+ defm V_SUB_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12 <0x30e, "v_sub_nc_i16 ", "V_SUB_I16 ">;
12031225defm V_PACK_B32_F16 : VOP3_Realtriple_gfx11_gfx12<0x311>;
12041226defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x312, "V_CVT_PKNORM_I16_F16" , "v_cvt_pk_norm_i16_f16" >;
12051227defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x313, "V_CVT_PKNORM_U16_F16" , "v_cvt_pk_norm_u16_f16" >;
0 commit comments