[AMDGPU] Add assembler/disassembler support for v_dual_dot2acc_f32_bf16 #118984

jayfoad · 2024-12-06T15:13:15Z

There is still no codegen support because the corresponding
v_dot2c_f32_bf16 instruction is not supported on GFX11.

There is still no codegen support because there is no corresponding v_dot2acc_f32_bf16 instruction.

llvmbot · 2024-12-06T15:14:01Z

@llvm/pr-subscribers-mc

@llvm/pr-subscribers-backend-amdgpu

Author: Jay Foad (jayfoad)

Changes

There is still no codegen support because there is no corresponding
v_dot2c_f32_bf16 instruction.

Patch is 22.54 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/118984.diff

5 Files Affected:

(modified) llvm/lib/Target/AMDGPU/VOP2Instructions.td (+1-1)
(modified) llvm/lib/Target/AMDGPU/VOPDInstructions.td (+7-11)
(modified) llvm/test/MC/AMDGPU/gfx11_asm_vopd.s (+216)
(modified) llvm/test/MC/AMDGPU/gfx11_unsupported.s (+3)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopd.txt (+108)

diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 128c7756191181..17fc1493ffdf97 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1190,7 +1190,7 @@ let Constraints = "$vdst = $src2",
     defm V_DOT8C_I32_I4  : VOP2Inst<"v_dot8c_i32_i4",  VOP_DOT_ACC_I32_I32>;
 
   let SubtargetPredicate = HasDot13Insts in
-    defm V_DOT2C_F32_BF16 : VOP2Inst<"v_dot2c_f32_bf16", VOP_DOT_ACC_F32_V2BF16>;
+    defm V_DOT2C_F32_BF16 : VOP2Inst_VOPD<"v_dot2c_f32_bf16", VOP_DOT_ACC_F32_V2BF16, 0xd, "v_dot2acc_f32_bf16">;
 }
 
 let AddedComplexity = 30 in {
diff --git a/llvm/lib/Target/AMDGPU/VOPDInstructions.td b/llvm/lib/Target/AMDGPU/VOPDInstructions.td
index c6af3d67c56007..b89cd8b914f8b9 100644
--- a/llvm/lib/Target/AMDGPU/VOPDInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPDInstructions.td
@@ -87,12 +87,12 @@ class VOPD_Base<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
   let ReadsModeReg = !or(VDX.ReadsModeReg, VDY.ReadsModeReg);
   let mayRaiseFPException = ReadsModeReg;
 
-  // V_DUAL_FMAC and V_DUAL_DOT2ACC_F32_F16 need a dummy src2 tied to dst for
-  // passes to track its uses. Its presence does not affect VOPD formation rules
-  // because the rules for src2 and dst are the same. src2X and src2Y should not
-  // be encoded.
-  bit hasSrc2AccX = !or(!eq(VDX.Mnemonic, "v_fmac_f32"), !eq(VDX.Mnemonic, "v_dot2c_f32_f16"));
-  bit hasSrc2AccY = !or(!eq(VDY.Mnemonic, "v_fmac_f32"), !eq(VDY.Mnemonic, "v_dot2c_f32_f16"));
+  // V_DUAL_FMAC and V_DUAL_DOT2ACC_F32_F16 and V_DUAL_DOT2ACC_F32_BF16 need a
+  // dummy src2 tied to dst for passes to track its uses. Its presence does not
+  // affect VOPD formation rules because the rules for src2 and dst are the
+  // same. src2X and src2Y should not be encoded.
+  bit hasSrc2AccX = !or(!eq(VDX.Mnemonic, "v_fmac_f32"), !eq(VDX.Mnemonic, "v_dot2c_f32_f16"), !eq(VDX.Mnemonic, "v_dot2c_f32_bf16"));
+  bit hasSrc2AccY = !or(!eq(VDY.Mnemonic, "v_fmac_f32"), !eq(VDY.Mnemonic, "v_dot2c_f32_f16"), !eq(VDY.Mnemonic, "v_dot2c_f32_bf16"));
   string ConstraintsX = !if(hasSrc2AccX, "$src2X = $vdstX", "");
   string ConstraintsY = !if(hasSrc2AccY, "$src2Y = $vdstY", "");
   let Constraints =
@@ -125,16 +125,12 @@ class VOPD_MADK<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
   let FixedSize = 1;
 }
 
-// V_DUAL_DOT2ACC_F32_BF16 is a legal instruction, but V_DOT2ACC_F32_BF16 is
-// not. V_DUAL_DOT2C_F32_BF16 is a legal instruction on GFX12, but
-// V_DOT2C_F32_F16_e32 is not. Since we generate the DUAL form by converting
-// from the normal form we will never generate them.
 defvar VOPDPseudosCommon = [
   "V_FMAC_F32_e32", "V_FMAAK_F32", "V_FMAMK_F32", "V_MUL_F32_e32",
   "V_ADD_F32_e32", "V_SUB_F32_e32", "V_SUBREV_F32_e32", "V_MUL_LEGACY_F32_e32",
   "V_MOV_B32_e32", "V_CNDMASK_B32_e32", "V_MAX_F32_e32", "V_MIN_F32_e32"
 ];
-defvar VOPDPseudosGFX11 = ["V_DOT2C_F32_F16_e32"];
+defvar VOPDPseudosGFX11 = ["V_DOT2C_F32_F16_e32", "V_DOT2C_F32_BF16_e32"];
 defvar VOPDYOnlyPseudosCommon = ["V_ADD_U32_e32", "V_LSHLREV_B32_e32",
                                  "V_AND_B32_e32"];
 
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopd.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopd.s
index 4b8ed488793db7..db05a9f6f11505 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopd.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopd.s
@@ -14817,3 +14817,219 @@ v_dual_subrev_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5
 v_dual_subrev_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4
 // GFX11: encoding: [0x7c,0x0a,0x8c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf]
 // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, v4, v2 :: v_dual_dot2acc_f32_bf16 v6, v1, v3
+// GFX11: encoding: [0x04,0x05,0x1a,0xc9,0x01,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, v1, v2 :: v_dual_dot2acc_f32_bf16 v6, v255, v3
+// GFX11: encoding: [0x01,0x05,0x1a,0xc9,0xff,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, v255, v2 :: v_dual_dot2acc_f32_bf16 v6, v2, v3
+// GFX11: encoding: [0xff,0x05,0x1a,0xc9,0x02,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, v2, v2 :: v_dual_dot2acc_f32_bf16 v6, v3, v3
+// GFX11: encoding: [0x02,0x05,0x1a,0xc9,0x03,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, v3, v2 :: v_dual_dot2acc_f32_bf16 v6, v4, v3
+// GFX11: encoding: [0x03,0x05,0x1a,0xc9,0x04,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, s105, v2 :: v_dual_dot2acc_f32_bf16 v6, s1, v3
+// GFX11: encoding: [0x69,0x04,0x1a,0xc9,0x01,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, s1, v2 :: v_dual_dot2acc_f32_bf16 v6, s105, v3
+// GFX11: encoding: [0x01,0x04,0x1a,0xc9,0x69,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, ttmp15, v2 :: v_dual_dot2acc_f32_bf16 v6, vcc_lo, v3
+// GFX11: encoding: [0x7b,0x04,0x1a,0xc9,0x6a,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, exec_hi, v2 :: v_dual_dot2acc_f32_bf16 v6, vcc_hi, v3
+// GFX11: encoding: [0x7f,0x04,0x1a,0xc9,0x6b,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, exec_lo, v2 :: v_dual_dot2acc_f32_bf16 v6, ttmp15, v3
+// GFX11: encoding: [0x7e,0x04,0x1a,0xc9,0x7b,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, m0, v2 :: v_dual_dot2acc_f32_bf16 v6, m0, v3
+// GFX11: encoding: [0x7d,0x04,0x1a,0xc9,0x7d,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_dot2acc_f32_bf16 v6, exec_lo, v3
+// GFX11: encoding: [0x6b,0x04,0x1a,0xc9,0x7e,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_dot2acc_f32_bf16 v6, exec_hi, v3
+// GFX11: encoding: [0x6a,0x04,0x1a,0xc9,0x7f,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_dot2acc_f32_bf16 v6, null, v3
+// GFX11: encoding: [0xff,0x04,0x1a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, src_scc, v2 :: v_dual_dot2acc_f32_bf16 v6, -1, v3
+// GFX11: encoding: [0xfd,0x04,0x1a,0xc9,0xc1,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, 0.5, v3 :: v_dual_dot2acc_f32_bf16 v6, 0.5, v2
+// GFX11: encoding: [0xf0,0x06,0x1a,0xc9,0xf0,0x04,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, -1, v4 :: v_dual_dot2acc_f32_bf16 v6, src_scc, v5
+// GFX11: encoding: [0xc1,0x08,0x1a,0xc9,0xfd,0x0a,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v6, null, v5 :: v_dual_dot2acc_f32_bf16 v255, 0xfe0b, v4
+// GFX11: encoding: [0x7c,0x0a,0x1a,0xc9,0xff,0x08,0xfe,0x06,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3
+// GFX11: encoding: [0x04,0x05,0x48,0xcb,0x01,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3
+// GFX11: encoding: [0x01,0x05,0x48,0xcb,0xff,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3
+// GFX11: encoding: [0xff,0x05,0x48,0xcb,0x02,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3
+// GFX11: encoding: [0x02,0x05,0x48,0xcb,0x03,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3
+// GFX11: encoding: [0x03,0x05,0x48,0xcb,0x04,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3
+// GFX11: encoding: [0x69,0x04,0x48,0xcb,0x01,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3
+// GFX11: encoding: [0x01,0x04,0x48,0xcb,0x69,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3
+// GFX11: encoding: [0x7b,0x04,0x48,0xcb,0x6a,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3
+// GFX11: encoding: [0x7f,0x04,0x48,0xcb,0x6b,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3
+// GFX11: encoding: [0x7e,0x04,0x48,0xcb,0x7b,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3
+// GFX11: encoding: [0x7d,0x04,0x48,0xcb,0x7d,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3
+// GFX11: encoding: [0x6b,0x04,0x48,0xcb,0x7e,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3
+// GFX11: encoding: [0x6a,0x04,0x48,0xcb,0x7f,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, 0xfe0b, v2 :: v_dual_add_f32 v6, null, v3
+// GFX11: encoding: [0xff,0x04,0x48,0xcb,0x7c,0x06,0x06,0xff,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3
+// GFX11: encoding: [0xfd,0x04,0x48,0xcb,0xc1,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2
+// GFX11: encoding: [0xf0,0x06,0x48,0xcb,0xf0,0x04,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5
+// GFX11: encoding: [0xc1,0x08,0x48,0xcb,0xfd,0x0a,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4
+// GFX11: encoding: [0x7c,0x0a,0x48,0xcb,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3
+// GFX11: encoding: [0x04,0x05,0x60,0xcb,0x01,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3
+// GFX11: encoding: [0x01,0x05,0x60,0xcb,0xff,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3
+// GFX11: encoding: [0xff,0x05,0x60,0xcb,0x02,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3
+// GFX11: encoding: [0x02,0x05,0x60,0xcb,0x03,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3
+// GFX11: encoding: [0x03,0x05,0x60,0xcb,0x04,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3
+// GFX11: encoding: [0x69,0x04,0x60,0xcb,0x01,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3
+// GFX11: encoding: [0x01,0x04,0x60,0xcb,0x69,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3
+// GFX11: encoding: [0x7b,0x04,0x60,0xcb,0x6a,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3
+// GFX11: encoding: [0x7f,0x04,0x60,0xcb,0x6b,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3
+// GFX11: encoding: [0x7e,0x04,0x60,0xcb,0x7b,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3
+// GFX11: encoding: [0x7d,0x04,0x60,0xcb,0x7d,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3
+// GFX11: encoding: [0x6b,0x04,0x60,0xcb,0x7e,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3
+// GFX11: encoding: [0x6a,0x04,0x60,0xcb,0x7f,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, 0xfe0b, v2 :: v_dual_add_nc_u32 v6, null, v3
+// GFX11: encoding: [0xff,0x04,0x60,0xcb,0x7c,0x06,0x06,0xff,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3
+// GFX11: encoding: [0xfd,0x04,0x60,0xcb,0xc1,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2
+// GFX11: encoding: [0xf0,0x06,0x60,0xcb,0xf0,0x04,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5
+// GFX11: encoding: [0xc1,0x08,0x60,0xcb,0xfd,0x0a,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4
+// GFX11: encoding: [0x7c,0x0a,0x60,0xcb,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
diff --git a/llvm/test/MC/AMDGPU/gfx11_unsupported.s b/llvm/test/MC/AMDGPU/gfx11_unsupported.s
index c565801d275bb8..640ffb65a76bf5 100644
--- a/llvm/test/MC/AMDGPU/gfx11_unsupported.s
+++ b/llvm/test/MC/AMDGPU/gfx11_unsupported.s
@@ -2064,3 +2064,6 @@ ds_subrev_u64 v1, v[2:3]
 
 ds_subrev_rtn_u64 v[5:6], v1, v[2:3]
 // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_dot2c_f32_bf16 v5, v1, v2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopd.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopd.txt
index 222718b70f0d74..1bdb7136de95ff 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopd.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopd.txt
@@ -11111,3 +11111,111 @@
 
 # GFX11: v_dual_subrev_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x8c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf]
 0x7c,0x0a,0x8c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf
+
+# GFX11: v_dual_add_f32 v255, v4, v2 :: v_dual_dot2acc_f32_bf16 v6, v1, v3 ; encoding: [0x04,0x05,0x1a,0xc9,0x01,0x07,0x06,0xff]
+0x04,0x05,0x1a,0xc9,0x01,0x07,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, v1, v2 :: v_dual_dot2acc_f32_bf16 v6, v255, v3 ; encoding: [0x01,0x05,0x1a,0xc9,0xff,0x07,0x06,0xff]
+0x01,0x05,0x1a,0xc9,0xff,0x07,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, v255, v2 :: v_dual_dot2acc_f32_bf16 v6, v2, v3 ; encoding: [0xff,0x05,0x1a,0xc9,0x02,0x07,0x06,0xff]
+0xff,0x05,0x1a,0xc9,0x02,0x07,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, v2, v2 :: v_dual_dot2acc_f32_bf16 v6, v3, v3 ; encoding: [0x02,0x05,0x1a,0xc9,0x03,0x07,0x06,0xff]
+0x02,0x05,0x1a,0xc9,0x03,0x07,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, v3, v2 :: v_dual_dot2acc_f32_bf16 v6, v4, v3 ; encoding: [0x03,0x05,0x1a,0xc9,0x04,0x07,0x06,0xff]
+0x03,0x05,0x1a,0xc9,0x04,0x07,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, s105, v2 :: v_dual_dot2acc_f32_bf16 v6, s1, v3 ; encoding: [0x69,0x04,0x1a,0xc9,0x01,0x06,0x06,0xff]
+0x69,0x04,0x1a,0xc9,0x01,0x06,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, s1, v2 :: v_dual_dot2acc_f32_bf16 v6, s105, v3 ; encoding: [0x01,0x04,0x1a,0xc9,0x69,0x06,0x06,0xff]
+0x01,0x04,0x1a,0xc9,0x69,0x06,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_dot2acc_f32_bf16 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x1a,0xc9,0x6a,0x06,0x06,0xff]
+0x7b,0x04,0x1a,0xc9,0x6a,0x06,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_dot2acc_f32_bf16 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x1a,0xc9,0x6b,0x06,0x06,0xff]
+0x7f,0x04,0x1a,0xc9,0x6b,0x06,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_dot2acc_f32_bf16 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x1a,0xc9,0x7b,0x06,0x06,0xff]
+0x7e,0x04,0x1a,0xc9,0x7b,0x06,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, m0, v2 :: v_dual_dot2acc_f32_bf16 v6, m0, v3 ; encoding: [0x7d,0x04,0x1a,0xc9,0x7d,0x06,0x06,0xff]
+0x7d,0x04,0x1a,0xc9,0x7d,0x06,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_dot2acc_f32_bf16 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x1a,0xc9,0x7e,0x06,0x06,0xff]
+0x6b,0x04,0x1a,0xc9,0x7e,0x06,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_dot2acc_f32_bf16 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x1a,0xc9,0x7f,0x06,0x06,0xff]
+0x6a,0x04,0x1a,0xc9,0x7f,0x06,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_dot2acc_f32_bf16 v6, null, v3 ; encoding: [0xff,0x04,0x1a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf]
+0xff,0x04,0x1a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf
+
+# GFX11: v_dual_add_f32 v255, src_scc, v2 :: v_dual_dot2acc_f32_bf16 v6, -1, v3 ; encoding: [0xfd,0x04,0x1a,0xc9,0xc1,0x06,0x06,0xff]
+0xfd,0x04,0x1a,0xc9,0xc1,0x06,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, 0.5, v3 :: v_dual_dot2acc_f32_bf16 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x1a,0xc9,0xf0,0x04,0x06,0xff]
+0xf0,0x06,0x1a,0xc9,0xf0,0x04,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, -1, v4 :: v_dual_dot2acc_f32_bf16 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x1a,0xc9,0xfd,0x0a,0x06,0xff]
+0xc1,0x08,0x1a,0xc9,0xfd,0x0a,0x06,0xff
+
+# GFX11: v_dual_add_f32 v6, null, v5 :: v_dual_dot2acc_f32_bf16 v255, 0xfe0b, v4 ; encoding: [0x7c,0x0a,0x1a,0xc9,0xff,0x08,0xfe,0x06,0x0b,0xfe,0x00,0x00]
+0x7c,0x0a,0x1a,0xc9,0xff,0x08,0xfe,0x06,0x0b,0xfe,0x00,0x00
+
+# GFX11: v_dual_dot2acc_f32_bf16 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x48,0xcb,0x01,0x07,0x06,0xff]
+0x04,0x05,0x48,0xcb,0x01,0x07,0x06,0xff
+
+# GFX11: v_dual_dot2acc_f32_bf16 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x48,0xcb,0xff,0x07,0x06,0xff]
+0x01,0x05,0x48,0xcb,0xff,0x07,0x06,0xff
+
+# GFX11: v_dual_dot2acc_f32_bf16 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x48,0xcb,0x02,0x07,0x06,0xff]
+0xff,0x05,0x48,0xcb,0x02,0x07,0x06,0xff
+
+# GFX11: v_dual_dot2acc_f32_bf16 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x48,0xcb,0x03,0x07,0x06,0xff]
+0x02,0x05,0x4...
[truncated]

rampitec

LGTM

Do for GFX12 what llvm#118984 did for GFX11.

#119211) Do for GFX12 what #118984 did for GFX11.

[AMDGPU] Add assembler/disassembler support for v_dual_dot2acc_f32_bf16

18a5edd

There is still no codegen support because there is no corresponding v_dot2acc_f32_bf16 instruction.

llvmbot added backend:AMDGPU llvm:mc Machine (object) code labels Dec 6, 2024

jayfoad requested review from Sisyph, broxigarchen, kosarev and rampitec December 6, 2024 15:13

rampitec approved these changes Dec 6, 2024

View reviewed changes

jayfoad merged commit f9d6d46 into llvm:main Dec 9, 2024
11 checks passed

jayfoad deleted the v-dual-dot2acc-f32-bf16 branch December 9, 2024 09:47

jayfoad added a commit to jayfoad/llvm-project that referenced this pull request Dec 9, 2024

[AMDGPU] Add GFX12 assembler/disassembler support for v_dual_dot2acc_*

f0392b1

Do for GFX12 what llvm#118984 did for GFX11.

jayfoad mentioned this pull request Dec 9, 2024

[AMDGPU] Add GFX12 assembler/disassembler support for v_dual_dot2acc_* #119211

Merged

jayfoad added a commit that referenced this pull request Dec 9, 2024

[AMDGPU] Add GFX12 assembler/disassembler support for v_dual_dot2acc_* (

d1cf86f

#119211) Do for GFX12 what #118984 did for GFX11.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU] Add assembler/disassembler support for v_dual_dot2acc_f32_bf16 #118984

[AMDGPU] Add assembler/disassembler support for v_dual_dot2acc_f32_bf16 #118984

Uh oh!

jayfoad commented Dec 6, 2024 •

edited

Loading

Uh oh!

llvmbot commented Dec 6, 2024 •

edited

Loading

Uh oh!

rampitec left a comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

[AMDGPU] Add assembler/disassembler support for v_dual_dot2acc_f32_bf16 #118984

[AMDGPU] Add assembler/disassembler support for v_dual_dot2acc_f32_bf16 #118984

Uh oh!

Conversation

jayfoad commented Dec 6, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Dec 6, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

rampitec left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

jayfoad commented Dec 6, 2024 •

edited

Loading

llvmbot commented Dec 6, 2024 •

edited

Loading