Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1878,6 +1878,8 @@ defm V_DOT4_F32_BF8_FP8 : VOP3P_Realtriple<GFX12Gen, 0x25>;
defm V_DOT4_F32_FP8_FP8 : VOP3P_Realtriple<GFX12Gen, 0x26>;
defm V_DOT4_F32_BF8_BF8 : VOP3P_Realtriple<GFX12Gen, 0x27>;

defm V_DOT2_F32_BF16 : VOP3P_Realtriple<GFX12Gen, 0x1a>;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is VOP3P_Realtriple_gfx11_gfx12 already.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is it? I couldn't find it.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I see. Take it from downstream, added 2 weeks ago by Mirko.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Included in latest commit.


//===----------------------------------------------------------------------===//
// GFX11
//===----------------------------------------------------------------------===//
Expand All @@ -1887,7 +1889,7 @@ multiclass VOP3P_Real_gfx11_gfx12<bits<8> op> :

defm V_DOT4_I32_IU8 : VOP3P_Real_gfx11_gfx12<0x16>;
defm V_DOT8_I32_IU4 : VOP3P_Real_gfx11_gfx12<0x18>;
defm V_DOT2_F32_BF16 : VOP3P_Real_gfx11_gfx12<0x1a>;
defm V_DOT2_F32_BF16 : VOP3P_Realtriple<GFX11Gen, 0x1a>;

let AssemblerPredicate = isGFX11Plus in {
def : AMDGPUMnemonicAlias<"v_dot4_i32_i8", "v_dot4_i32_iu8">;
Expand Down
9 changes: 9 additions & 0 deletions llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@ v_dot2_f32_bf16 v5, src_scc, vcc_lo, src_scc neg_lo:[1,0,0] neg_hi:[1,0,0]
v_dot2_f32_bf16 v255, 0xfe0b, vcc_hi, 0.5 neg_lo:[0,1,0] neg_hi:[0,1,0] clamp
// GFX11: [0xff,0xc2,0x1a,0xcc,0xff,0xd6,0xc0,0x5b,0x0b,0xfe,0x00,0x00]

v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should go into gfx11_asm_vop3p_dpp16.s.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done.

// GFX11: [0x01,0x40,0x1a,0xcc,0xfa,0x06,0x12,0x1c,0x02,0xe4,0x00,0xff]

v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX11: [0x01,0x40,0x1a,0xcc,0xfa,0x06,0x12,0x1c,0x02,0xe4,0x00,0x00]

v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x01,0x40,0x1a,0xcc,0xe9,0x06,0x12,0x1c,0x02,0x77,0x39,0x05]

v_dot2_f32_f16 v5, v1, v2, s3
// GFX11: [0x05,0x40,0x13,0xcc,0x01,0x05,0x0e,0x18]

Expand Down
9 changes: 9 additions & 0 deletions llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@ v_dot2_f32_bf16 v5, src_scc, vcc_lo, src_scc neg_lo:[1,0,0] neg_hi:[1,0,0]
v_dot2_f32_bf16 v255, 0xfe0b, vcc_hi, 0.5 neg_lo:[0,0,0] neg_hi:[0,0,0] clamp
// GFX12: [0xff,0xc0,0x1a,0xcc,0xff,0xd6,0xc0,0x1b,0x0b,0xfe,0x00,0x00]

v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should go into gfx12_asm_vop3p_dpp16.s.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done.

// GFX11: [0x01,0x40,0x1a,0xcc,0xfa,0x06,0x12,0x1c,0x02,0xe4,0x00,0xff]

v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX11: [0x01,0x40,0x1a,0xcc,0xfa,0x06,0x12,0x1c,0x02,0xe4,0x00,0x00]

v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x01,0x40,0x1a,0xcc,0xe9,0x06,0x12,0x1c,0x02,0x77,0x39,0x05]

v_dot2_f32_f16 v5, v1, v2, s3
// GFX12: [0x05,0x40,0x13,0xcc,0x01,0x05,0x0e,0x18]

Expand Down
Loading