@@ -1016,7 +1016,11 @@ class VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<ValueType Src0Ty> :
1016
1016
let HasFP8DstByteSel = 1;
1017
1017
}
1018
1018
1019
- def VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile : VOP3_Profile<VOPProfile<[i32, v2f32, i32, f32]>, VOP3_OPSEL> {
1019
+ class VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile<VOPProfile P>
1020
+ : VOP3_Profile<P, VOP3_OPSEL> {
1021
+
1022
+ let Src0RC64 = !if(!gt(P.Src0VT.Size, 32), getVOP3VRegSrcForVT<P.Src0VT>.ret,
1023
+ getVOP3SrcForVT<P.Src0VT>.ret);
1020
1024
let InsVOP3OpSel = (ins PackedF32InputMods: $src0_modifiers, Src0RC64:$src0,
1021
1025
Int32InputMods: $src1_modifiers, Src1RC64:$src1,
1022
1026
FP32InputMods: $src2_modifiers, Src2RC64:$src2,
@@ -1064,6 +1068,11 @@ class VOP3_CVT_SCALEF32_PK_F864_Profile<VOPProfile P> : VOP3_Profile<P> {
1064
1068
let HasExt32BitDPP = 0;
1065
1069
let HasExtVOP3DPP = 0;
1066
1070
let HasExt64BitDPP = 0;
1071
+
1072
+ // All convert opcodes operating on FP6/BF6/FP4 data must use VGPR sources for
1073
+ // any operand slots > 32 bit.
1074
+ let Src0RC64 = !if(!gt(P.Src0VT.Size, 32), getVOP3VRegSrcForVT<P.Src0VT>.ret,
1075
+ getVOP3SrcForVT<P.Src0VT>.ret);
1067
1076
}
1068
1077
1069
1078
let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in {
@@ -1105,7 +1114,10 @@ let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in
1105
1114
let Constraints = "@earlyclobber $vdst" in {
1106
1115
defm V_CVT_SCALEF32_SR_PK_FP4_F16: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_f16", VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<v2f16>>;
1107
1116
defm V_CVT_SCALEF32_SR_PK_FP4_BF16: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_bf16", VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<v2bf16>>;
1108
- defm V_CVT_SCALEF32_SR_PK_FP4_F32: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_f32", VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile>;
1117
+ defm V_CVT_SCALEF32_SR_PK_FP4_F32
1118
+ : VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_f32",
1119
+ VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile<
1120
+ VOP_I32_V2F32_I32_F32>>;
1109
1121
}
1110
1122
}
1111
1123
defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f16>>;
0 commit comments