@@ -2218,7 +2218,7 @@ class VOP3PX2e <bits<8> op, bits<8> LdScaleOp, VOP3PWMMA_Profile P> : Enc128, VO
22182218 let Inst{23-16} = LdScaleOp;
22192219 let Inst{40-32} = scale_src0;
22202220 let Inst{49-41} = scale_src1;
2221- let Inst{58-50} = 0 ; // scale src2
2221+ let Inst{58-50} = 0x100 ; // scale src2 = vgpr0 (dummy)
22222222 let Inst{59} = matrix_b_scale{0}; // scale_op_sel_hi(0)
22232223 let Inst{60} = 0; // scale_op_sel_hi(1)
22242224 let Inst{63-61} = {0, matrix_a_scale_fmt{1-0}}; // neg (lo)
@@ -2433,6 +2433,15 @@ multiclass VOP3P_Real_with_name_gfx12<bits<8> op,
24332433 string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> :
24342434 VOP3P_Real_with_name<GFX12Gen, op, backing_ps_name, asmName>;
24352435
2436+ multiclass VOP3P_Real_LD_SCALE_gfx1250<bits<8> op> {
2437+ defvar ps = !cast<VOP3P_Pseudo>(NAME);
2438+ def _gfx1250 :
2439+ VOP3P_Real_Gen<ps, GFX1250Gen, ps.Mnemonic>,
2440+ VOP3Pe_gfx11_gfx12<op, ps.Pfl> {
2441+ let Inst{58-50} = 0x100; // scale src2 = vgpr0 (dummy)
2442+ }
2443+ }
2444+
24362445defm V_PK_MIN_NUM_F16 : VOP3P_Real_with_name_gfx12<0x1b, "V_PK_MIN_F16", "v_pk_min_num_f16">;
24372446defm V_PK_MAX_NUM_F16 : VOP3P_Real_with_name_gfx12<0x1c, "V_PK_MAX_F16", "v_pk_max_num_f16">;
24382447
@@ -2462,8 +2471,8 @@ defm V_FMA_MIX_F32_BF16 : VOP3P_Realtriple<GFX1250Gen, 0x3d>;
24622471defm V_FMA_MIXLO_BF16 : VOP3P_Realtriple<GFX1250Gen, 0x3e>;
24632472defm V_FMA_MIXHI_BF16 : VOP3P_Realtriple<GFX1250Gen, 0x3f>;
24642473
2465- defm V_WMMA_LD_SCALE_PAIRED_B32 : VOP3P_Real_gfx1250 <0x35>;
2466- defm V_WMMA_LD_SCALE16_PAIRED_B64 : VOP3P_Real_gfx1250 <0x3a>;
2474+ defm V_WMMA_LD_SCALE_PAIRED_B32 : VOP3P_Real_LD_SCALE_gfx1250 <0x35>;
2475+ defm V_WMMA_LD_SCALE16_PAIRED_B64 : VOP3P_Real_LD_SCALE_gfx1250 <0x3a>;
24672476
24682477let AssemblerPredicate = isGFX1250Plus in
24692478def : AMDGPUMnemonicAlias<"v_fma_mix_f32_f16", "v_fma_mix_f32">;
0 commit comments