-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[AMDGPU] Add assembler/disassembler support for v_dual_dot2acc_f32_bf16 #118984
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
There is still no codegen support because there is no corresponding v_dot2acc_f32_bf16 instruction.
|
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-amdgpu Author: Jay Foad (jayfoad) ChangesThere is still no codegen support because there is no corresponding Patch is 22.54 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/118984.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 128c7756191181..17fc1493ffdf97 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1190,7 +1190,7 @@ let Constraints = "$vdst = $src2",
defm V_DOT8C_I32_I4 : VOP2Inst<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>;
let SubtargetPredicate = HasDot13Insts in
- defm V_DOT2C_F32_BF16 : VOP2Inst<"v_dot2c_f32_bf16", VOP_DOT_ACC_F32_V2BF16>;
+ defm V_DOT2C_F32_BF16 : VOP2Inst_VOPD<"v_dot2c_f32_bf16", VOP_DOT_ACC_F32_V2BF16, 0xd, "v_dot2acc_f32_bf16">;
}
let AddedComplexity = 30 in {
diff --git a/llvm/lib/Target/AMDGPU/VOPDInstructions.td b/llvm/lib/Target/AMDGPU/VOPDInstructions.td
index c6af3d67c56007..b89cd8b914f8b9 100644
--- a/llvm/lib/Target/AMDGPU/VOPDInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPDInstructions.td
@@ -87,12 +87,12 @@ class VOPD_Base<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
let ReadsModeReg = !or(VDX.ReadsModeReg, VDY.ReadsModeReg);
let mayRaiseFPException = ReadsModeReg;
- // V_DUAL_FMAC and V_DUAL_DOT2ACC_F32_F16 need a dummy src2 tied to dst for
- // passes to track its uses. Its presence does not affect VOPD formation rules
- // because the rules for src2 and dst are the same. src2X and src2Y should not
- // be encoded.
- bit hasSrc2AccX = !or(!eq(VDX.Mnemonic, "v_fmac_f32"), !eq(VDX.Mnemonic, "v_dot2c_f32_f16"));
- bit hasSrc2AccY = !or(!eq(VDY.Mnemonic, "v_fmac_f32"), !eq(VDY.Mnemonic, "v_dot2c_f32_f16"));
+ // V_DUAL_FMAC and V_DUAL_DOT2ACC_F32_F16 and V_DUAL_DOT2ACC_F32_BF16 need a
+ // dummy src2 tied to dst for passes to track its uses. Its presence does not
+ // affect VOPD formation rules because the rules for src2 and dst are the
+ // same. src2X and src2Y should not be encoded.
+ bit hasSrc2AccX = !or(!eq(VDX.Mnemonic, "v_fmac_f32"), !eq(VDX.Mnemonic, "v_dot2c_f32_f16"), !eq(VDX.Mnemonic, "v_dot2c_f32_bf16"));
+ bit hasSrc2AccY = !or(!eq(VDY.Mnemonic, "v_fmac_f32"), !eq(VDY.Mnemonic, "v_dot2c_f32_f16"), !eq(VDY.Mnemonic, "v_dot2c_f32_bf16"));
string ConstraintsX = !if(hasSrc2AccX, "$src2X = $vdstX", "");
string ConstraintsY = !if(hasSrc2AccY, "$src2Y = $vdstY", "");
let Constraints =
@@ -125,16 +125,12 @@ class VOPD_MADK<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
let FixedSize = 1;
}
-// V_DUAL_DOT2ACC_F32_BF16 is a legal instruction, but V_DOT2ACC_F32_BF16 is
-// not. V_DUAL_DOT2C_F32_BF16 is a legal instruction on GFX12, but
-// V_DOT2C_F32_F16_e32 is not. Since we generate the DUAL form by converting
-// from the normal form we will never generate them.
defvar VOPDPseudosCommon = [
"V_FMAC_F32_e32", "V_FMAAK_F32", "V_FMAMK_F32", "V_MUL_F32_e32",
"V_ADD_F32_e32", "V_SUB_F32_e32", "V_SUBREV_F32_e32", "V_MUL_LEGACY_F32_e32",
"V_MOV_B32_e32", "V_CNDMASK_B32_e32", "V_MAX_F32_e32", "V_MIN_F32_e32"
];
-defvar VOPDPseudosGFX11 = ["V_DOT2C_F32_F16_e32"];
+defvar VOPDPseudosGFX11 = ["V_DOT2C_F32_F16_e32", "V_DOT2C_F32_BF16_e32"];
defvar VOPDYOnlyPseudosCommon = ["V_ADD_U32_e32", "V_LSHLREV_B32_e32",
"V_AND_B32_e32"];
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopd.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopd.s
index 4b8ed488793db7..db05a9f6f11505 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopd.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopd.s
@@ -14817,3 +14817,219 @@ v_dual_subrev_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5
v_dual_subrev_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4
// GFX11: encoding: [0x7c,0x0a,0x8c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, v4, v2 :: v_dual_dot2acc_f32_bf16 v6, v1, v3
+// GFX11: encoding: [0x04,0x05,0x1a,0xc9,0x01,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, v1, v2 :: v_dual_dot2acc_f32_bf16 v6, v255, v3
+// GFX11: encoding: [0x01,0x05,0x1a,0xc9,0xff,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, v255, v2 :: v_dual_dot2acc_f32_bf16 v6, v2, v3
+// GFX11: encoding: [0xff,0x05,0x1a,0xc9,0x02,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, v2, v2 :: v_dual_dot2acc_f32_bf16 v6, v3, v3
+// GFX11: encoding: [0x02,0x05,0x1a,0xc9,0x03,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, v3, v2 :: v_dual_dot2acc_f32_bf16 v6, v4, v3
+// GFX11: encoding: [0x03,0x05,0x1a,0xc9,0x04,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, s105, v2 :: v_dual_dot2acc_f32_bf16 v6, s1, v3
+// GFX11: encoding: [0x69,0x04,0x1a,0xc9,0x01,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, s1, v2 :: v_dual_dot2acc_f32_bf16 v6, s105, v3
+// GFX11: encoding: [0x01,0x04,0x1a,0xc9,0x69,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, ttmp15, v2 :: v_dual_dot2acc_f32_bf16 v6, vcc_lo, v3
+// GFX11: encoding: [0x7b,0x04,0x1a,0xc9,0x6a,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, exec_hi, v2 :: v_dual_dot2acc_f32_bf16 v6, vcc_hi, v3
+// GFX11: encoding: [0x7f,0x04,0x1a,0xc9,0x6b,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, exec_lo, v2 :: v_dual_dot2acc_f32_bf16 v6, ttmp15, v3
+// GFX11: encoding: [0x7e,0x04,0x1a,0xc9,0x7b,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, m0, v2 :: v_dual_dot2acc_f32_bf16 v6, m0, v3
+// GFX11: encoding: [0x7d,0x04,0x1a,0xc9,0x7d,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_dot2acc_f32_bf16 v6, exec_lo, v3
+// GFX11: encoding: [0x6b,0x04,0x1a,0xc9,0x7e,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_dot2acc_f32_bf16 v6, exec_hi, v3
+// GFX11: encoding: [0x6a,0x04,0x1a,0xc9,0x7f,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_dot2acc_f32_bf16 v6, null, v3
+// GFX11: encoding: [0xff,0x04,0x1a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, src_scc, v2 :: v_dual_dot2acc_f32_bf16 v6, -1, v3
+// GFX11: encoding: [0xfd,0x04,0x1a,0xc9,0xc1,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, 0.5, v3 :: v_dual_dot2acc_f32_bf16 v6, 0.5, v2
+// GFX11: encoding: [0xf0,0x06,0x1a,0xc9,0xf0,0x04,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v255, -1, v4 :: v_dual_dot2acc_f32_bf16 v6, src_scc, v5
+// GFX11: encoding: [0xc1,0x08,0x1a,0xc9,0xfd,0x0a,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_add_f32 v6, null, v5 :: v_dual_dot2acc_f32_bf16 v255, 0xfe0b, v4
+// GFX11: encoding: [0x7c,0x0a,0x1a,0xc9,0xff,0x08,0xfe,0x06,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3
+// GFX11: encoding: [0x04,0x05,0x48,0xcb,0x01,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3
+// GFX11: encoding: [0x01,0x05,0x48,0xcb,0xff,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3
+// GFX11: encoding: [0xff,0x05,0x48,0xcb,0x02,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3
+// GFX11: encoding: [0x02,0x05,0x48,0xcb,0x03,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3
+// GFX11: encoding: [0x03,0x05,0x48,0xcb,0x04,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3
+// GFX11: encoding: [0x69,0x04,0x48,0xcb,0x01,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3
+// GFX11: encoding: [0x01,0x04,0x48,0xcb,0x69,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3
+// GFX11: encoding: [0x7b,0x04,0x48,0xcb,0x6a,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3
+// GFX11: encoding: [0x7f,0x04,0x48,0xcb,0x6b,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3
+// GFX11: encoding: [0x7e,0x04,0x48,0xcb,0x7b,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3
+// GFX11: encoding: [0x7d,0x04,0x48,0xcb,0x7d,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3
+// GFX11: encoding: [0x6b,0x04,0x48,0xcb,0x7e,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3
+// GFX11: encoding: [0x6a,0x04,0x48,0xcb,0x7f,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, 0xfe0b, v2 :: v_dual_add_f32 v6, null, v3
+// GFX11: encoding: [0xff,0x04,0x48,0xcb,0x7c,0x06,0x06,0xff,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3
+// GFX11: encoding: [0xfd,0x04,0x48,0xcb,0xc1,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2
+// GFX11: encoding: [0xf0,0x06,0x48,0xcb,0xf0,0x04,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5
+// GFX11: encoding: [0xc1,0x08,0x48,0xcb,0xfd,0x0a,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4
+// GFX11: encoding: [0x7c,0x0a,0x48,0xcb,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3
+// GFX11: encoding: [0x04,0x05,0x60,0xcb,0x01,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3
+// GFX11: encoding: [0x01,0x05,0x60,0xcb,0xff,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3
+// GFX11: encoding: [0xff,0x05,0x60,0xcb,0x02,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3
+// GFX11: encoding: [0x02,0x05,0x60,0xcb,0x03,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3
+// GFX11: encoding: [0x03,0x05,0x60,0xcb,0x04,0x07,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3
+// GFX11: encoding: [0x69,0x04,0x60,0xcb,0x01,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3
+// GFX11: encoding: [0x01,0x04,0x60,0xcb,0x69,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3
+// GFX11: encoding: [0x7b,0x04,0x60,0xcb,0x6a,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3
+// GFX11: encoding: [0x7f,0x04,0x60,0xcb,0x6b,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3
+// GFX11: encoding: [0x7e,0x04,0x60,0xcb,0x7b,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3
+// GFX11: encoding: [0x7d,0x04,0x60,0xcb,0x7d,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3
+// GFX11: encoding: [0x6b,0x04,0x60,0xcb,0x7e,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3
+// GFX11: encoding: [0x6a,0x04,0x60,0xcb,0x7f,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, 0xfe0b, v2 :: v_dual_add_nc_u32 v6, null, v3
+// GFX11: encoding: [0xff,0x04,0x60,0xcb,0x7c,0x06,0x06,0xff,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3
+// GFX11: encoding: [0xfd,0x04,0x60,0xcb,0xc1,0x06,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2
+// GFX11: encoding: [0xf0,0x06,0x60,0xcb,0xf0,0x04,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5
+// GFX11: encoding: [0xc1,0x08,0x60,0xcb,0xfd,0x0a,0x06,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
+
+v_dual_dot2acc_f32_bf16 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4
+// GFX11: encoding: [0x7c,0x0a,0x60,0xcb,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires wavesize=32
diff --git a/llvm/test/MC/AMDGPU/gfx11_unsupported.s b/llvm/test/MC/AMDGPU/gfx11_unsupported.s
index c565801d275bb8..640ffb65a76bf5 100644
--- a/llvm/test/MC/AMDGPU/gfx11_unsupported.s
+++ b/llvm/test/MC/AMDGPU/gfx11_unsupported.s
@@ -2064,3 +2064,6 @@ ds_subrev_u64 v1, v[2:3]
ds_subrev_rtn_u64 v[5:6], v1, v[2:3]
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_dot2c_f32_bf16 v5, v1, v2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopd.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopd.txt
index 222718b70f0d74..1bdb7136de95ff 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopd.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopd.txt
@@ -11111,3 +11111,111 @@
# GFX11: v_dual_subrev_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x8c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf]
0x7c,0x0a,0x8c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf
+
+# GFX11: v_dual_add_f32 v255, v4, v2 :: v_dual_dot2acc_f32_bf16 v6, v1, v3 ; encoding: [0x04,0x05,0x1a,0xc9,0x01,0x07,0x06,0xff]
+0x04,0x05,0x1a,0xc9,0x01,0x07,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, v1, v2 :: v_dual_dot2acc_f32_bf16 v6, v255, v3 ; encoding: [0x01,0x05,0x1a,0xc9,0xff,0x07,0x06,0xff]
+0x01,0x05,0x1a,0xc9,0xff,0x07,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, v255, v2 :: v_dual_dot2acc_f32_bf16 v6, v2, v3 ; encoding: [0xff,0x05,0x1a,0xc9,0x02,0x07,0x06,0xff]
+0xff,0x05,0x1a,0xc9,0x02,0x07,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, v2, v2 :: v_dual_dot2acc_f32_bf16 v6, v3, v3 ; encoding: [0x02,0x05,0x1a,0xc9,0x03,0x07,0x06,0xff]
+0x02,0x05,0x1a,0xc9,0x03,0x07,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, v3, v2 :: v_dual_dot2acc_f32_bf16 v6, v4, v3 ; encoding: [0x03,0x05,0x1a,0xc9,0x04,0x07,0x06,0xff]
+0x03,0x05,0x1a,0xc9,0x04,0x07,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, s105, v2 :: v_dual_dot2acc_f32_bf16 v6, s1, v3 ; encoding: [0x69,0x04,0x1a,0xc9,0x01,0x06,0x06,0xff]
+0x69,0x04,0x1a,0xc9,0x01,0x06,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, s1, v2 :: v_dual_dot2acc_f32_bf16 v6, s105, v3 ; encoding: [0x01,0x04,0x1a,0xc9,0x69,0x06,0x06,0xff]
+0x01,0x04,0x1a,0xc9,0x69,0x06,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_dot2acc_f32_bf16 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x1a,0xc9,0x6a,0x06,0x06,0xff]
+0x7b,0x04,0x1a,0xc9,0x6a,0x06,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_dot2acc_f32_bf16 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x1a,0xc9,0x6b,0x06,0x06,0xff]
+0x7f,0x04,0x1a,0xc9,0x6b,0x06,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_dot2acc_f32_bf16 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x1a,0xc9,0x7b,0x06,0x06,0xff]
+0x7e,0x04,0x1a,0xc9,0x7b,0x06,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, m0, v2 :: v_dual_dot2acc_f32_bf16 v6, m0, v3 ; encoding: [0x7d,0x04,0x1a,0xc9,0x7d,0x06,0x06,0xff]
+0x7d,0x04,0x1a,0xc9,0x7d,0x06,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_dot2acc_f32_bf16 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x1a,0xc9,0x7e,0x06,0x06,0xff]
+0x6b,0x04,0x1a,0xc9,0x7e,0x06,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_dot2acc_f32_bf16 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x1a,0xc9,0x7f,0x06,0x06,0xff]
+0x6a,0x04,0x1a,0xc9,0x7f,0x06,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_dot2acc_f32_bf16 v6, null, v3 ; encoding: [0xff,0x04,0x1a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf]
+0xff,0x04,0x1a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf
+
+# GFX11: v_dual_add_f32 v255, src_scc, v2 :: v_dual_dot2acc_f32_bf16 v6, -1, v3 ; encoding: [0xfd,0x04,0x1a,0xc9,0xc1,0x06,0x06,0xff]
+0xfd,0x04,0x1a,0xc9,0xc1,0x06,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, 0.5, v3 :: v_dual_dot2acc_f32_bf16 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x1a,0xc9,0xf0,0x04,0x06,0xff]
+0xf0,0x06,0x1a,0xc9,0xf0,0x04,0x06,0xff
+
+# GFX11: v_dual_add_f32 v255, -1, v4 :: v_dual_dot2acc_f32_bf16 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x1a,0xc9,0xfd,0x0a,0x06,0xff]
+0xc1,0x08,0x1a,0xc9,0xfd,0x0a,0x06,0xff
+
+# GFX11: v_dual_add_f32 v6, null, v5 :: v_dual_dot2acc_f32_bf16 v255, 0xfe0b, v4 ; encoding: [0x7c,0x0a,0x1a,0xc9,0xff,0x08,0xfe,0x06,0x0b,0xfe,0x00,0x00]
+0x7c,0x0a,0x1a,0xc9,0xff,0x08,0xfe,0x06,0x0b,0xfe,0x00,0x00
+
+# GFX11: v_dual_dot2acc_f32_bf16 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x48,0xcb,0x01,0x07,0x06,0xff]
+0x04,0x05,0x48,0xcb,0x01,0x07,0x06,0xff
+
+# GFX11: v_dual_dot2acc_f32_bf16 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x48,0xcb,0xff,0x07,0x06,0xff]
+0x01,0x05,0x48,0xcb,0xff,0x07,0x06,0xff
+
+# GFX11: v_dual_dot2acc_f32_bf16 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x48,0xcb,0x02,0x07,0x06,0xff]
+0xff,0x05,0x48,0xcb,0x02,0x07,0x06,0xff
+
+# GFX11: v_dual_dot2acc_f32_bf16 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x48,0xcb,0x03,0x07,0x06,0xff]
+0x02,0x05,0x4...
[truncated]
|
rampitec
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Do for GFX12 what llvm#118984 did for GFX11.
There is still no codegen support because the corresponding
v_dot2c_f32_bf16 instruction is not supported on GFX11.