Skip to content

Commit af512a6

Browse files
committed
This commit:
(1) created VOP3P_Realtriple_gfx11_gfx12 (2) updated asm tests (3) created disasm tests.
1 parent 1cbabd4 commit af512a6

File tree

11 files changed

+55
-21
lines changed

11 files changed

+55
-21
lines changed

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1856,6 +1856,9 @@ multiclass VOP3P_Realtriple<GFXGen Gen, bits<8> op, string backing_ps_name = NAM
18561856
VOP3P_Real_dpp<Gen, op, backing_ps_name, asmName>,
18571857
VOP3P_Real_dpp8<Gen, op, backing_ps_name, asmName>;
18581858

1859+
multiclass VOP3P_Realtriple_gfx11_gfx12<bits<8> op>
1860+
: VOP3P_Realtriple<GFX11Gen, op>, VOP3P_Realtriple<GFX12Gen, op>;
1861+
18591862
//===----------------------------------------------------------------------===//
18601863
// GFX12
18611864
//===----------------------------------------------------------------------===//
@@ -1878,8 +1881,6 @@ defm V_DOT4_F32_BF8_FP8 : VOP3P_Realtriple<GFX12Gen, 0x25>;
18781881
defm V_DOT4_F32_FP8_FP8 : VOP3P_Realtriple<GFX12Gen, 0x26>;
18791882
defm V_DOT4_F32_BF8_BF8 : VOP3P_Realtriple<GFX12Gen, 0x27>;
18801883

1881-
defm V_DOT2_F32_BF16 : VOP3P_Realtriple<GFX12Gen, 0x1a>;
1882-
18831884
//===----------------------------------------------------------------------===//
18841885
// GFX11
18851886
//===----------------------------------------------------------------------===//
@@ -1889,7 +1890,7 @@ multiclass VOP3P_Real_gfx11_gfx12<bits<8> op> :
18891890

18901891
defm V_DOT4_I32_IU8 : VOP3P_Real_gfx11_gfx12<0x16>;
18911892
defm V_DOT8_I32_IU4 : VOP3P_Real_gfx11_gfx12<0x18>;
1892-
defm V_DOT2_F32_BF16 : VOP3P_Realtriple<GFX11Gen, 0x1a>;
1893+
defm V_DOT2_F32_BF16 : VOP3P_Realtriple_gfx11_gfx12<0x1a>;
18931894

18941895
let AssemblerPredicate = isGFX11Plus in {
18951896
def : AMDGPUMnemonicAlias<"v_dot4_i32_i8", "v_dot4_i32_iu8">;

llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5390,3 +5390,15 @@ v_dot2_bf16_bf16_e64_dpp v5.l, -|v1|, -|v2|, -1 row_xmask:0 row_mask:0x1 bank_ma
53905390

53915391
v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
53925392
// GFX11: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30]
5393+
5394+
v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[2,2,3,1] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x7a,0x04,0xff]
5395+
// GFX11: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[2,2,3,1] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x7a,0x04,0xff]
5396+
5397+
v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xe ; encoding: [0x00,0x45,0x1a,0xcc,0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xfe]
5398+
// GFX11: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xe ; encoding: [0x00,0x45,0x1a,0xcc,0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xfe]
5399+
5400+
v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x39,0x00,0xff]
5401+
// GFX11: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x39,0x00,0xff]
5402+
5403+
v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[1,2,3,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x39,0x00,0x00]
5404+
// GFX11: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[1,2,3,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x39,0x00,0x00]

llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3630,3 +3630,9 @@ v_dot2_bf16_bf16_e64_dpp v5.l, -|v1|, -|v2|, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1
36303630

36313631
v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| dpp8:[0,0,0,0,0,0,0,0] fi:0
36323632
// GFX11: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x47,0x67,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
3633+
3634+
v_dot2_f32_bf16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x40,0x1a,0xcc,0xe9,0x04,0x0e,0x18,0x01,0x77,0x39,0x05]
3635+
// GFX11: v_dot2_f32_bf16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x40,0x1a,0xcc,0xe9,0x04,0x0e,0x18,0x01,0x77,0x39,0x05]
3636+
3637+
v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 neg_lo:[0,1,1] neg_hi:[1,0,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x00,0x45,0x1a,0xcc,0xe9,0x04,0x0e,0xdc,0x01,0x77,0x39,0x05]
3638+
// GFX11: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 neg_lo:[0,1,1] neg_hi:[1,0,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x00,0x45,0x1a,0xcc,0xe9,0x04,0x0e,0xdc,0x01,0x77,0x39,0x05]

llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,6 @@ v_dot2_f32_bf16 v5, src_scc, vcc_lo, src_scc neg_lo:[1,0,0] neg_hi:[1,0,0]
4545
v_dot2_f32_bf16 v255, 0xfe0b, vcc_hi, 0.5 neg_lo:[0,1,0] neg_hi:[0,1,0] clamp
4646
// GFX11: [0xff,0xc2,0x1a,0xcc,0xff,0xd6,0xc0,0x5b,0x0b,0xfe,0x00,0x00]
4747

48-
v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
49-
// GFX11: [0x01,0x40,0x1a,0xcc,0xfa,0x06,0x12,0x1c,0x02,0xe4,0x00,0xff]
50-
51-
v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
52-
// GFX11: [0x01,0x40,0x1a,0xcc,0xfa,0x06,0x12,0x1c,0x02,0xe4,0x00,0x00]
53-
54-
v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 dpp8:[7,6,5,4,3,2,1,0]
55-
// GFX11: [0x01,0x40,0x1a,0xcc,0xe9,0x06,0x12,0x1c,0x02,0x77,0x39,0x05]
56-
5748
v_dot2_f32_f16 v5, v1, v2, s3
5849
// GFX11: [0x05,0x40,0x13,0xcc,0x01,0x05,0x0e,0x18]
5950

llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5639,6 +5639,18 @@ v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h row_mirror
56395639
v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
56405640
// GFX12: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30]
56415641

5642+
v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[2,2,3,1] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x7a,0x04,0xff]
5643+
// GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[2,2,3,1] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x7a,0x04,0xff]
5644+
5645+
v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xe ; encoding: [0x00,0x45,0x1a,0xcc,0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xfe]
5646+
// GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xe ; encoding: [0x00,0x45,0x1a,0xcc,0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xfe]
5647+
5648+
v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x39,0x00,0xff]
5649+
// GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x39,0x00,0xff]
5650+
5651+
v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[1,2,3,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x39,0x00,0x00]
5652+
// GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[1,2,3,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x39,0x00,0x00]
5653+
56425654
v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0]
56435655
// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
56445656

llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3896,6 +3896,12 @@ v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h dpp8:[7,6,5,4,3,2,1,0]
38963896
v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| dpp8:[0,0,0,0,0,0,0,0] fi:0
38973897
// GFX12: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x47,0x67,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
38983898

3899+
v_dot2_f32_bf16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x40,0x1a,0xcc,0xe9,0x04,0x0e,0x18,0x01,0x77,0x39,0x05]
3900+
// GFX12: v_dot2_f32_bf16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x40,0x1a,0xcc,0xe9,0x04,0x0e,0x18,0x01,0x77,0x39,0x05]
3901+
3902+
v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 neg_lo:[0,1,1] neg_hi:[1,0,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x00,0x45,0x1a,0xcc,0xe9,0x04,0x0e,0xdc,0x01,0x77,0x39,0x05]
3903+
// GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 neg_lo:[0,1,1] neg_hi:[1,0,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x00,0x45,0x1a,0xcc,0xe9,0x04,0x0e,0xdc,0x01,0x77,0x39,0x05]
3904+
38993905
v_minimum_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
39003906
// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x65,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
39013907

llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,6 @@ v_dot2_f32_bf16 v5, src_scc, vcc_lo, src_scc neg_lo:[1,0,0] neg_hi:[1,0,0]
4545
v_dot2_f32_bf16 v255, 0xfe0b, vcc_hi, 0.5 neg_lo:[0,0,0] neg_hi:[0,0,0] clamp
4646
// GFX12: [0xff,0xc0,0x1a,0xcc,0xff,0xd6,0xc0,0x1b,0x0b,0xfe,0x00,0x00]
4747

48-
v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
49-
// GFX11: [0x01,0x40,0x1a,0xcc,0xfa,0x06,0x12,0x1c,0x02,0xe4,0x00,0xff]
50-
51-
v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
52-
// GFX11: [0x01,0x40,0x1a,0xcc,0xfa,0x06,0x12,0x1c,0x02,0xe4,0x00,0x00]
53-
54-
v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 dpp8:[7,6,5,4,3,2,1,0]
55-
// GFX11: [0x01,0x40,0x1a,0xcc,0xe9,0x06,0x12,0x1c,0x02,0x77,0x39,0x05]
56-
5748
v_dot2_f32_f16 v5, v1, v2, s3
5849
// GFX12: [0x05,0x40,0x13,0xcc,0x01,0x05,0x0e,0x18]
5950

llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3p_dpp16.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
# GFX11: v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xe ; encoding: [0x00,0x45,0x13,0xcc,0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xfe]
55
0x00,0x45,0x13,0xcc,0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xfe
66

7+
# GFX11: v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xe ; encoding: [0x00,0x45,0x13,0xcc,0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xfe]
8+
0x00,0x45,0x13,0xcc,0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xfe
9+
710
# GFX11: v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 quad_perm:[2,2,3,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; encoding: [0x00,0x40,0x13,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x7a,0x0c,0xff]
811
0x00,0x40,0x13,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x7a,0x0c,0xff
912

llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3p_dpp8.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
# GFX11: v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 neg_lo:[0,1,1] neg_hi:[1,0,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x00,0x45,0x13,0xcc,0xe9,0x04,0x0e,0xdc,0x01,0x77,0x39,0x05]
55
0x00,0x45,0x13,0xcc,0xe9,0x04,0x0e,0xdc,0x01,0x77,0x39,0x05
66

7+
# GFX11: v_dot2_f32_bf16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x40,0x1a,0xcc,0xe9,0x04,0x0e,0x18,0x01,0x77,0x39,0x05]
8+
0x05,0x40,0x1a,0xcc,0xe9,0x04,0x0e,0x18,0x01,0x77,0x39,0x05
9+
710
# GFX11: v_fma_mix_f32_e64_dpp v0, v1, v2, v3 clamp dpp8:[2,2,2,2,4,4,4,4] fi:1 ; encoding: [0x00,0x80,0x20,0xcc,0xea,0x04,0x0e,0x04,0x01,0x92,0x44,0x92]
811
0x00,0x80,0x20,0xcc,0xea,0x04,0x0e,0x04,0x01,0x92,0x44,0x92
912

llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p_dpp16.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@
77
# GFX12: v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 quad_perm:[2,2,3,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; encoding: [0x00,0x40,0x13,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x7a,0x0c,0xff]
88
0x00,0x40,0x13,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x7a,0x0c,0xff
99

10+
# GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xe ; encoding: [0x00,0x45,0x1a,0xcc,0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xfe]
11+
0x00,0x45,0x1a,0xcc,0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xfe
12+
13+
# GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[2,2,3,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x7a,0x0c,0xff]
14+
0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x7a,0x0c,0xff
15+
1016
# GFX12: v_fma_mix_f32_e64_dpp v0, v1, v2, v3 row_ror:7 row_mask:0xf bank_mask:0x1 bound_ctrl:1 ; encoding: [0x00,0x00,0x20,0xcc,0xfa,0x04,0x0e,0x04,0x01,0x27,0x09,0xf1]
1117
0x00,0x00,0x20,0xcc,0xfa,0x04,0x0e,0x04,0x01,0x27,0x09,0xf1
1218

0 commit comments

Comments
 (0)