@@ -405,8 +405,15 @@ defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
405405
406406let OtherPredicates = [HasDot7Insts] in {
407407defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
408+ VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
409+ // v_dot4_u32_u8 does not allow op_sel Pre-GFX11
410+ defm V_DOT4_U32_U8_PREGFX11 : VOP3PInst<"v_dot4_u32_u8",
408411 VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot4, 1>;
412+
409413defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
414+ VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
415+ // v_dot8_u32_u4 does not allow op_sel Pre-GFX11
416+ defm V_DOT8_U32_U4_PREGFX11 : VOP3PInst<"v_dot8_u32_u4",
410417 VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot8, 1>;
411418} // End OtherPredicates = [HasDot7Insts]
412419
@@ -433,7 +440,7 @@ defm V_DOT2_F32_BF16 : VOP3PInst<"v_dot2_f32_bf16", DOT2_BF16_Profile,
433440
434441multiclass VOP3PDOTIUInst <string OpName, SDPatternOperator intrinsic_node> {
435442 let IsDOT = 1 in
436- defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL >,
443+ defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED >,
437444 null_frag, 1>;
438445 // Dot-iu instructions consider input as signed if imod neg bits are set. Thus
439446 // Dot-iu Intrinsics have extra operands and require separate codegen pattern.
@@ -2097,8 +2104,8 @@ defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x26>;
20972104defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x27>;
20982105
20992106defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x23>;
2100- defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x29>;
2101- defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x2b>;
2107+ defm V_DOT4_U32_U8_PREGFX11 : VOP3P_Real_vi <0x29>;
2108+ defm V_DOT8_U32_U4_PREGFX11 : VOP3P_Real_vi <0x2b>;
21022109
21032110defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x28>;
21042111defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x2a>;
@@ -2256,8 +2263,10 @@ defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x14>;
22562263defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x15>;
22572264
22582265defm V_DOT2_F32_F16 : VOP3P_Real_gfx10_gfx11_gfx12_Triple<0x13>;
2259- defm V_DOT4_U32_U8 : VOP3P_Real_gfx10_gfx11_gfx12<0x17>;
2260- defm V_DOT8_U32_U4 : VOP3P_Real_gfx10_gfx11_gfx12<0x19>;
2266+ defm V_DOT4_U32_U8 : VOP3P_Real_gfx11_gfx12<0x17>;
2267+ defm V_DOT4_U32_U8_PREGFX11 : VOP3P_Real_gfx10<0x17>;
2268+ defm V_DOT8_U32_U4 : VOP3P_Real_gfx11_gfx12<0x19>;
2269+ defm V_DOT8_U32_U4_PREGFX11 : VOP3P_Real_gfx10<0x19>;
22612270
22622271defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x16>;
22632272defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x18>;
0 commit comments