@@ -1016,7 +1016,11 @@ class VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<ValueType Src0Ty> :
10161016 let HasFP8DstByteSel = 1;
10171017}
10181018
1019- def VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile : VOP3_Profile<VOPProfile<[i32, v2f32, i32, f32]>, VOP3_OPSEL> {
1019+ class VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile<VOPProfile P>
1020+ : VOP3_Profile<P, VOP3_OPSEL> {
1021+
1022+ let Src0RC64 = !if(!gt(P.Src0VT.Size, 32), getVOP3VRegSrcForVT<P.Src0VT>.ret,
1023+ getVOP3SrcForVT<P.Src0VT>.ret);
10201024 let InsVOP3OpSel = (ins PackedF32InputMods: $src0_modifiers, Src0RC64:$src0,
10211025 Int32InputMods: $src1_modifiers, Src1RC64:$src1,
10221026 FP32InputMods: $src2_modifiers, Src2RC64:$src2,
@@ -1064,6 +1068,11 @@ class VOP3_CVT_SCALEF32_PK_F864_Profile<VOPProfile P> : VOP3_Profile<P> {
10641068 let HasExt32BitDPP = 0;
10651069 let HasExtVOP3DPP = 0;
10661070 let HasExt64BitDPP = 0;
1071+
1072+ // All convert opcodes operating on FP6/BF6/FP4 data must use VGPR sources for
1073+ // any operand slots > 32 bit.
1074+ let Src0RC64 = !if(!gt(P.Src0VT.Size, 32), getVOP3VRegSrcForVT<P.Src0VT>.ret,
1075+ getVOP3SrcForVT<P.Src0VT>.ret);
10671076}
10681077
10691078let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in {
@@ -1105,7 +1114,10 @@ let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in
11051114 let Constraints = "@earlyclobber $vdst" in {
11061115 defm V_CVT_SCALEF32_SR_PK_FP4_F16: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_f16", VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<v2f16>>;
11071116 defm V_CVT_SCALEF32_SR_PK_FP4_BF16: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_bf16", VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<v2bf16>>;
1108- defm V_CVT_SCALEF32_SR_PK_FP4_F32: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_f32", VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile>;
1117+ defm V_CVT_SCALEF32_SR_PK_FP4_F32
1118+ : VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_f32",
1119+ VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile<
1120+ VOP_I32_V2F32_I32_F32>>;
11091121 }
11101122 }
11111123 defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f16>>;
0 commit comments