Skip to content

Commit 8893ea2

Browse files
committed
Allow OPSEL for GFX11 and GFX12.
Section 7.5.1 of the RDNA3 ISA doc apparently indicates that OPSEL is allowed (although ignored) in the dot instructions in question. Therefore, we should allow it for GFX11 and GFX12.
1 parent 521263b commit 8893ea2

File tree

3 files changed

+14
-442
lines changed

3 files changed

+14
-442
lines changed

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -405,8 +405,15 @@ defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
405405

406406
let OtherPredicates = [HasDot7Insts] in {
407407
defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
408+
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
409+
// v_dot4_u32_u8 does not allow op_sel Pre-GFX11
410+
defm V_DOT4_U32_U8_PREGFX11 : VOP3PInst<"v_dot4_u32_u8",
408411
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot4, 1>;
412+
409413
defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
414+
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
415+
// v_dot8_u32_u4 does not allow op_sel Pre-GFX11
416+
defm V_DOT8_U32_U4_PREGFX11 : VOP3PInst<"v_dot8_u32_u4",
410417
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot8, 1>;
411418
} // End OtherPredicates = [HasDot7Insts]
412419

@@ -433,7 +440,7 @@ defm V_DOT2_F32_BF16 : VOP3PInst<"v_dot2_f32_bf16", DOT2_BF16_Profile,
433440

434441
multiclass VOP3PDOTIUInst <string OpName, SDPatternOperator intrinsic_node> {
435442
let IsDOT = 1 in
436-
defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>,
443+
defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>,
437444
null_frag, 1>;
438445
// Dot-iu instructions consider input as signed if imod neg bits are set. Thus
439446
// Dot-iu Intrinsics have extra operands and require separate codegen pattern.
@@ -2097,8 +2104,8 @@ defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x26>;
20972104
defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x27>;
20982105

20992106
defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x23>;
2100-
defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x29>;
2101-
defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x2b>;
2107+
defm V_DOT4_U32_U8_PREGFX11 : VOP3P_Real_vi <0x29>;
2108+
defm V_DOT8_U32_U4_PREGFX11 : VOP3P_Real_vi <0x2b>;
21022109

21032110
defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x28>;
21042111
defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x2a>;
@@ -2256,8 +2263,10 @@ defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x14>;
22562263
defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x15>;
22572264

22582265
defm V_DOT2_F32_F16 : VOP3P_Real_gfx10_gfx11_gfx12_Triple<0x13>;
2259-
defm V_DOT4_U32_U8 : VOP3P_Real_gfx10_gfx11_gfx12<0x17>;
2260-
defm V_DOT8_U32_U4 : VOP3P_Real_gfx10_gfx11_gfx12<0x19>;
2266+
defm V_DOT4_U32_U8 : VOP3P_Real_gfx11_gfx12<0x17>;
2267+
defm V_DOT4_U32_U8_PREGFX11 : VOP3P_Real_gfx10<0x17>;
2268+
defm V_DOT8_U32_U4 : VOP3P_Real_gfx11_gfx12<0x19>;
2269+
defm V_DOT8_U32_U4_PREGFX11 : VOP3P_Real_gfx10<0x19>;
22612270

22622271
defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x16>;
22632272
defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x18>;

llvm/test/MC/AMDGPU/gfx11_asm_vop3p_err.s

Lines changed: 0 additions & 219 deletions
This file was deleted.

0 commit comments

Comments
 (0)