@@ -383,8 +383,15 @@ defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
383383
384384let OtherPredicates = [HasDot7Insts] in {
385385defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
386+ VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
387+ // v_dot4_u32_u8 does not allow op_sel Pre-GFX11
388+ defm V_DOT4_U32_U8_PREGFX11 : VOP3PInst<"v_dot4_u32_u8",
386389 VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot4, 1>;
390+
387391defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
392+ VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
393+ // v_dot8_u32_u4 does not allow op_sel Pre-GFX11
394+ defm V_DOT8_U32_U4_PREGFX11 : VOP3PInst<"v_dot8_u32_u4",
388395 VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot8, 1>;
389396} // End OtherPredicates = [HasDot7Insts]
390397
@@ -411,7 +418,7 @@ defm V_DOT2_F32_BF16 : VOP3PInst<"v_dot2_f32_bf16", DOT2_BF16_Profile,
411418
412419multiclass VOP3PDOTIUInst <string OpName, SDPatternOperator intrinsic_node> {
413420 let IsDOT = 1 in
414- defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL >,
421+ defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED >,
415422 null_frag, 1>;
416423 // Dot-iu instructions consider input as signed if imod neg bits are set. Thus
417424 // Dot-iu Intrinsics have extra operands and require separate codegen pattern.
@@ -1712,8 +1719,8 @@ defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x26>;
17121719defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x27>;
17131720
17141721defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x23>;
1715- defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x29>;
1716- defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x2b>;
1722+ defm V_DOT4_U32_U8_PREGFX11 : VOP3P_Real_vi <0x29>;
1723+ defm V_DOT8_U32_U4_PREGFX11 : VOP3P_Real_vi <0x2b>;
17171724
17181725defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x28>;
17191726defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x2a>;
@@ -1845,8 +1852,10 @@ defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x14>;
18451852defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x15>;
18461853
18471854defm V_DOT2_F32_F16 : VOP3P_Real_gfx10_gfx11_gfx12_Triple<0x13>;
1848- defm V_DOT4_U32_U8 : VOP3P_Real_gfx10_gfx11_gfx12<0x17>;
1849- defm V_DOT8_U32_U4 : VOP3P_Real_gfx10_gfx11_gfx12<0x19>;
1855+ defm V_DOT4_U32_U8 : VOP3P_Real_gfx11_gfx12<0x17>;
1856+ defm V_DOT4_U32_U8_PREGFX11 : VOP3P_Real_gfx10<0x17>;
1857+ defm V_DOT8_U32_U4 : VOP3P_Real_gfx11_gfx12<0x19>;
1858+ defm V_DOT8_U32_U4_PREGFX11 : VOP3P_Real_gfx10<0x19>;
18501859
18511860defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x16>;
18521861defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x18>;
0 commit comments