Skip to content

Commit 1cbabd4

Browse files
committed
[AMDGPU][MC] Allow dpp in v_dot2_f32_bf16 for GFX11 and 12
Allowing the dpp operand in v_dot2_f32_bf16 for GFX11 and 12.
1 parent 7114cfb commit 1cbabd4

File tree

3 files changed

+21
-1
lines changed

3 files changed

+21
-1
lines changed

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1878,6 +1878,8 @@ defm V_DOT4_F32_BF8_FP8 : VOP3P_Realtriple<GFX12Gen, 0x25>;
18781878
defm V_DOT4_F32_FP8_FP8 : VOP3P_Realtriple<GFX12Gen, 0x26>;
18791879
defm V_DOT4_F32_BF8_BF8 : VOP3P_Realtriple<GFX12Gen, 0x27>;
18801880

1881+
defm V_DOT2_F32_BF16 : VOP3P_Realtriple<GFX12Gen, 0x1a>;
1882+
18811883
//===----------------------------------------------------------------------===//
18821884
// GFX11
18831885
//===----------------------------------------------------------------------===//
@@ -1887,7 +1889,7 @@ multiclass VOP3P_Real_gfx11_gfx12<bits<8> op> :
18871889

18881890
defm V_DOT4_I32_IU8 : VOP3P_Real_gfx11_gfx12<0x16>;
18891891
defm V_DOT8_I32_IU4 : VOP3P_Real_gfx11_gfx12<0x18>;
1890-
defm V_DOT2_F32_BF16 : VOP3P_Real_gfx11_gfx12<0x1a>;
1892+
defm V_DOT2_F32_BF16 : VOP3P_Realtriple<GFX11Gen, 0x1a>;
18911893

18921894
let AssemblerPredicate = isGFX11Plus in {
18931895
def : AMDGPUMnemonicAlias<"v_dot4_i32_i8", "v_dot4_i32_iu8">;

llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,15 @@ v_dot2_f32_bf16 v5, src_scc, vcc_lo, src_scc neg_lo:[1,0,0] neg_hi:[1,0,0]
4545
v_dot2_f32_bf16 v255, 0xfe0b, vcc_hi, 0.5 neg_lo:[0,1,0] neg_hi:[0,1,0] clamp
4646
// GFX11: [0xff,0xc2,0x1a,0xcc,0xff,0xd6,0xc0,0x5b,0x0b,0xfe,0x00,0x00]
4747

48+
v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
49+
// GFX11: [0x01,0x40,0x1a,0xcc,0xfa,0x06,0x12,0x1c,0x02,0xe4,0x00,0xff]
50+
51+
v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
52+
// GFX11: [0x01,0x40,0x1a,0xcc,0xfa,0x06,0x12,0x1c,0x02,0xe4,0x00,0x00]
53+
54+
v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 dpp8:[7,6,5,4,3,2,1,0]
55+
// GFX11: [0x01,0x40,0x1a,0xcc,0xe9,0x06,0x12,0x1c,0x02,0x77,0x39,0x05]
56+
4857
v_dot2_f32_f16 v5, v1, v2, s3
4958
// GFX11: [0x05,0x40,0x13,0xcc,0x01,0x05,0x0e,0x18]
5059

llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,15 @@ v_dot2_f32_bf16 v5, src_scc, vcc_lo, src_scc neg_lo:[1,0,0] neg_hi:[1,0,0]
4545
v_dot2_f32_bf16 v255, 0xfe0b, vcc_hi, 0.5 neg_lo:[0,0,0] neg_hi:[0,0,0] clamp
4646
// GFX12: [0xff,0xc0,0x1a,0xcc,0xff,0xd6,0xc0,0x1b,0x0b,0xfe,0x00,0x00]
4747

48+
v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
49+
// GFX11: [0x01,0x40,0x1a,0xcc,0xfa,0x06,0x12,0x1c,0x02,0xe4,0x00,0xff]
50+
51+
v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
52+
// GFX11: [0x01,0x40,0x1a,0xcc,0xfa,0x06,0x12,0x1c,0x02,0xe4,0x00,0x00]
53+
54+
v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 dpp8:[7,6,5,4,3,2,1,0]
55+
// GFX11: [0x01,0x40,0x1a,0xcc,0xe9,0x06,0x12,0x1c,0x02,0x77,0x39,0x05]
56+
4857
v_dot2_f32_f16 v5, v1, v2, s3
4958
// GFX12: [0x05,0x40,0x13,0xcc,0x01,0x05,0x0e,0x18]
5059

0 commit comments

Comments
 (0)