[AMDGPU][MC] Allow dpp in v_dot2_f32_bf16 for GFX11 and 12

jwanggit86 · jwanggit86 · commit 1cbabd48b5b4 · 2025-06-02T10:52:34.000-07:00
Allowing the dpp operand in v_dot2_f32_bf16 for GFX11 and 12.
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -1878,6 +1878,8 @@ defm V_DOT4_F32_BF8_FP8 : VOP3P_Realtriple<GFX12Gen, 0x25>;
 defm V_DOT4_F32_FP8_FP8 : VOP3P_Realtriple<GFX12Gen, 0x26>;
 defm V_DOT4_F32_BF8_BF8 : VOP3P_Realtriple<GFX12Gen, 0x27>;
 
+defm V_DOT2_F32_BF16 : VOP3P_Realtriple<GFX12Gen, 0x1a>;
+
 //===----------------------------------------------------------------------===//
 // GFX11
 //===----------------------------------------------------------------------===//
@@ -1887,7 +1889,7 @@ multiclass VOP3P_Real_gfx11_gfx12<bits<8> op> :
 
 defm V_DOT4_I32_IU8  : VOP3P_Real_gfx11_gfx12<0x16>;
 defm V_DOT8_I32_IU4  : VOP3P_Real_gfx11_gfx12<0x18>;
-defm V_DOT2_F32_BF16 : VOP3P_Real_gfx11_gfx12<0x1a>;
+defm V_DOT2_F32_BF16 : VOP3P_Realtriple<GFX11Gen, 0x1a>;
 
 let AssemblerPredicate = isGFX11Plus in {
   def : AMDGPUMnemonicAlias<"v_dot4_i32_i8", "v_dot4_i32_iu8">;
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s
@@ -45,6 +45,15 @@ v_dot2_f32_bf16 v5, src_scc, vcc_lo, src_scc neg_lo:[1,0,0] neg_hi:[1,0,0]
 v_dot2_f32_bf16 v255, 0xfe0b, vcc_hi, 0.5 neg_lo:[0,1,0] neg_hi:[0,1,0] clamp
 // GFX11: [0xff,0xc2,0x1a,0xcc,0xff,0xd6,0xc0,0x5b,0x0b,0xfe,0x00,0x00]
 
+v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// GFX11: [0x01,0x40,0x1a,0xcc,0xfa,0x06,0x12,0x1c,0x02,0xe4,0x00,0xff]
+
+v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
+// GFX11: [0x01,0x40,0x1a,0xcc,0xfa,0x06,0x12,0x1c,0x02,0xe4,0x00,0x00]
+
+v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x01,0x40,0x1a,0xcc,0xe9,0x06,0x12,0x1c,0x02,0x77,0x39,0x05]
+
 v_dot2_f32_f16 v5, v1, v2, s3
 // GFX11: [0x05,0x40,0x13,0xcc,0x01,0x05,0x0e,0x18]
 
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s
@@ -45,6 +45,15 @@ v_dot2_f32_bf16 v5, src_scc, vcc_lo, src_scc neg_lo:[1,0,0] neg_hi:[1,0,0]
 v_dot2_f32_bf16 v255, 0xfe0b, vcc_hi, 0.5 neg_lo:[0,0,0] neg_hi:[0,0,0] clamp
 // GFX12: [0xff,0xc0,0x1a,0xcc,0xff,0xd6,0xc0,0x1b,0x0b,0xfe,0x00,0x00]
 
+v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// GFX11: [0x01,0x40,0x1a,0xcc,0xfa,0x06,0x12,0x1c,0x02,0xe4,0x00,0xff]
+
+v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
+// GFX11: [0x01,0x40,0x1a,0xcc,0xfa,0x06,0x12,0x1c,0x02,0xe4,0x00,0x00]
+
+v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x01,0x40,0x1a,0xcc,0xe9,0x06,0x12,0x1c,0x02,0x77,0x39,0x05]
+
 v_dot2_f32_f16 v5, v1, v2, s3
 // GFX12: [0x05,0x40,0x13,0xcc,0x01,0x05,0x0e,0x18]