[AMDGPU] gfx1251 VOP2 dpp support #159641

rampitec · 2025-09-18T19:59:28Z

No description provided.

rampitec · 2025-09-18T19:59:48Z

[AMDGPU] gfx1251 VOP3 dpp support #159654
[AMDGPU] gfx1251 VOP2 dpp support #159641 👈 (View in Graphite)
[AMDGPU] gfx1251 VOP1 dpp support #159637
main

This stack of pull requests is managed by Graphite. Learn more about stacking.

llvmbot · 2025-09-18T20:06:59Z

@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)

Changes

Patch is 22.81 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/159641.diff

5 Files Affected:

(modified) llvm/lib/Target/AMDGPU/VOP2Instructions.td (+45-34)
(modified) llvm/test/CodeGen/AMDGPU/dpp_combine.ll (+5-1)
(added) llvm/test/MC/AMDGPU/gfx1251_asm_vop2_dpp16.s (+74)
(added) llvm/test/MC/AMDGPU/gfx1251_asm_vop2_err.s (+106)
(added) llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop2_dpp16.txt (+37)

diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 46a1a4bf1ab4a..37d92bc5076de 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -287,10 +287,14 @@ multiclass VOP2bInst <string opName,
       def _e64 : VOP3InstBase <opName, P, node, 1>,
                  Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
 
-      let SubtargetPredicate = isGFX11Plus in {
-        if P.HasExtVOP3DPP then
-          def _e64_dpp  : VOP3_DPP_Pseudo <opName, P>;
-      } // End SubtargetPredicate = isGFX11Plus
+      if P.HasExtVOP3DPP then
+        def _e64_dpp  : VOP3_DPP_Pseudo <opName, P> {
+          let SubtargetPredicate = isGFX11Plus;
+        }
+      else if P.HasExt64BitDPP then
+        def _e64_dpp  : VOP3_DPP_Pseudo <opName, P> {
+          let OtherPredicates = [HasDPALU_DPP];
+      }
     }
 }
 
@@ -345,10 +349,14 @@ multiclass
                  VOPD_Component<VOPDOp, VOPDName>;
     }
 
-    let SubtargetPredicate = isGFX11Plus in {
-      if P.HasExtVOP3DPP then
-        def _e64_dpp  : VOP3_DPP_Pseudo <opName, P>;
-    } // End SubtargetPredicate = isGFX11Plus
+    if P.HasExtVOP3DPP then
+      def _e64_dpp  : VOP3_DPP_Pseudo <opName, P> {
+        let SubtargetPredicate = isGFX11Plus;
+      }
+    else if P.HasExt64BitDPP then
+      def _e64_dpp  : VOP3_DPP_Pseudo <opName, P> {
+        let OtherPredicates = [HasDPALU_DPP];
+      }
   }
 }
 
@@ -1607,8 +1615,9 @@ multiclass VOP2_Real_dpp<GFXGen Gen, bits<6> op> {
 }
 
 multiclass VOP2_Real_dpp8<GFXGen Gen, bits<6> op> {
-  if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
-  def _dpp8#Gen.Suffix : VOP2_DPP8_Gen<op, !cast<VOP2_Pseudo>(NAME#"_e32"), Gen>;
+  defvar ps = !cast<VOP2_Pseudo>(NAME#"_e32");
+  if !and(ps.Pfl.HasExtDPP, !not(ps.Pfl.HasExt64BitDPP)) then
+    def _dpp8#Gen.Suffix : VOP2_DPP8_Gen<op, ps, Gen>;
 }
 
 //===------------------------- VOP2 (with name) -------------------------===//
@@ -1643,10 +1652,10 @@ multiclass VOP2_Real_dpp_with_name<GFXGen Gen, bits<6> op, string opName,
 multiclass VOP2_Real_dpp8_with_name<GFXGen Gen, bits<6> op, string opName,
                                     string asmName> {
   defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
-  if ps.Pfl.HasExtDPP then
-  def _dpp8#Gen.Suffix : VOP2_DPP8_Gen<op, ps, Gen> {
-    let AsmString = asmName # ps.Pfl.AsmDPP8;
-  }
+  if !and(ps.Pfl.HasExtDPP, !not(ps.Pfl.HasExt64BitDPP)) then
+    def _dpp8#Gen.Suffix : VOP2_DPP8_Gen<op, ps, Gen> {
+      let AsmString = asmName # ps.Pfl.AsmDPP8;
+    }
 }
 
 //===------------------------------ VOP2be ------------------------------===//
@@ -1687,32 +1696,32 @@ multiclass VOP2be_Real_dpp<GFXGen Gen, bits<6> op, string opName, string asmName
     }
 }
 multiclass VOP2be_Real_dpp8<GFXGen Gen, bits<6> op, string opName, string asmName> {
-  if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
+  defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
+  if !and(ps.Pfl.HasExtDPP, !not(ps.Pfl.HasExt64BitDPP)) then {
   def _dpp8#Gen.Suffix :
-    VOP2_DPP8_Gen<op, !cast<VOP2_Pseudo>(opName#"_e32"), Gen> {
-      string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
+    VOP2_DPP8_Gen<op, ps, Gen> {
+      string AsmDPP8 = ps.Pfl.AsmDPP8;
       let AsmString = asmName # !subst(", vcc", "", AsmDPP8);
     }
-  if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
   def _dpp8_w32#Gen.Suffix :
-    VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
-      string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
+    VOP2_DPP8<op, ps> {
+      string AsmDPP8 = ps.Pfl.AsmDPP8;
       let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8);
       let isAsmParserOnly = 1;
       let WaveSizePredicate = isWave32;
       let AssemblerPredicate = Gen.AssemblerPredicate;
       let DecoderNamespace = Gen.DecoderNamespace;
     }
-  if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
   def _dpp8_w64#Gen.Suffix :
-    VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
-      string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
+    VOP2_DPP8<op, ps> {
+      string AsmDPP8 = ps.Pfl.AsmDPP8;
       let AsmString = asmName # AsmDPP8;
       let isAsmParserOnly = 1;
       let WaveSizePredicate = isWave64;
       let AssemblerPredicate = Gen.AssemblerPredicate;
       let DecoderNamespace = Gen.DecoderNamespace;
     }
+  }
 }
 
 // We don't want to override separate decoderNamespaces within these
@@ -1777,9 +1786,11 @@ multiclass VOP2_Real_NO_DPP_with_name<GFXGen Gen, bits<6> op, string opName,
   }
 }
 
-multiclass VOP2_Real_NO_DPP_with_alias<GFXGen Gen, bits<6> op, string alias> {
+multiclass VOP2_Real_with_DPP16_with_alias<GFXGen Gen, bits<6> op, string alias> {
   defm NAME : VOP2_Real_e32<Gen, op>,
-              VOP2_Real_e64<Gen, op>;
+              VOP2_Real_dpp<Gen, op>,
+              VOP2_Real_e64<Gen, op>,
+              VOP3_Real_dpp_Base<Gen, {0, 1, 0, 0, op{5-0}}>;
   def Gen.Suffix#"_alias" : AMDGPUMnemonicAlias<alias, NAME> {
     let AssemblerPredicate = Gen.AssemblerPredicate;
   }
@@ -1808,6 +1819,9 @@ multiclass VOP2_Real_FULL_t16_gfx12<bits<6> op, string opName,
   }
 }
 
+multiclass VOP2_Real_with_DPP16_with_alias_gfx12<bits<6> op, string alias> :
+  VOP2_Real_with_DPP16_with_alias<GFX12Gen, op, alias>;
+
 multiclass VOP2_Real_FULL_t16_and_fake16_gfx12<bits<6> op, string opName,
                                                string asmName, string alias> {
   defm _t16: VOP2_Real_FULL_t16_gfx12<op, opName#"_t16", asmName, alias>;
@@ -1818,14 +1832,11 @@ multiclass VOP2_Real_NO_DPP_with_name_gfx12<bits<6> op, string opName,
                                             string asmName> :
   VOP2_Real_NO_DPP_with_name<GFX12Gen, op, opName, asmName>;
 
-multiclass VOP2_Real_NO_DPP_with_alias_gfx12<bits<6> op, string alias> :
-  VOP2_Real_NO_DPP_with_alias<GFX12Gen, op, alias>;
-
-defm V_ADD_F64     : VOP2_Real_NO_DPP_with_name_gfx12<0x002, "V_ADD_F64_pseudo", "v_add_f64">;
-defm V_MUL_F64     : VOP2_Real_NO_DPP_with_name_gfx12<0x006, "V_MUL_F64_pseudo", "v_mul_f64">;
-defm V_LSHLREV_B64 : VOP2_Real_NO_DPP_with_name_gfx12<0x01f, "V_LSHLREV_B64_pseudo", "v_lshlrev_b64">;
-defm V_MIN_NUM_F64 : VOP2_Real_NO_DPP_with_alias_gfx12<0x00d, "v_min_f64">;
-defm V_MAX_NUM_F64 : VOP2_Real_NO_DPP_with_alias_gfx12<0x00e, "v_max_f64">;
+defm V_ADD_F64     : VOP2_Real_FULL_with_name_gfx12<0x002, "V_ADD_F64_pseudo", "v_add_f64">;
+defm V_MUL_F64     : VOP2_Real_FULL_with_name_gfx12<0x006, "V_MUL_F64_pseudo", "v_mul_f64">;
+defm V_LSHLREV_B64 : VOP2_Real_FULL_with_name_gfx12<0x01f, "V_LSHLREV_B64_pseudo", "v_lshlrev_b64">;
+defm V_MIN_NUM_F64 : VOP2_Real_with_DPP16_with_alias_gfx12<0x00d, "v_min_f64">;
+defm V_MAX_NUM_F64 : VOP2_Real_with_DPP16_with_alias_gfx12<0x00e, "v_max_f64">;
 
 defm V_CNDMASK_B32 : VOP2e_Real_gfx12<0x001, "V_CNDMASK_B32", "v_cndmask_b32">;
 defm V_ADD_CO_CI_U32 :
@@ -2776,7 +2787,7 @@ let DecoderNamespace = "GFX90A" in {
   }
 } // End AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A"
 
-let SubtargetPredicate = HasFmacF64Inst in {
+let SubtargetPredicate = HasFmacF64Inst, OtherPredicates = [isGFX9Only] in {
   defm V_FMAC_F64       : VOP2_Real_e32e64_gfx90a <0x4>;
 } // End SubtargetPredicate = HasFmacF64Inst
 
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.ll b/llvm/test/CodeGen/AMDGPU/dpp_combine.ll
index 539485d19a2b9..a3251bdfafebf 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.ll
@@ -4,6 +4,8 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck %s -check-prefixes=GCN,GFX11-FAKE16
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=+real-true16 < %s | FileCheck %s -check-prefixes=GCN,GFX11-TRUE16
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-real-true16 < %s | FileCheck %s -check-prefixes=GCN,GFX11-FAKE16
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1251 -mattr=+real-true16 < %s | FileCheck %s -check-prefixes=GCN,GFX11-TRUE16
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1251 -mattr=-real-true16 < %s | FileCheck %s -check-prefixes=GCN,GFX11-FAKE16
 
 ; GCN-LABEL: {{^}}dpp_add:
 ; GCN: global_load_{{dword|b32}} [[V:v[0-9]+]],
@@ -49,7 +51,9 @@ define amdgpu_kernel void @dpp_fadd(ptr addrspace(1) %arg) {
   ret void
 }
 
-; Fails to combine because v_mul_lo_u32 has no e32 or dpp form.
+; Fails to combine prior to gfx1251 because v_mul_lo_u32 has no e32 or dpp form.
+; Fails to combine on gfx1251 because DPP control value is invalid for DP DPP and v_mul_lo_u32 is
+; classified as DP DPP.
 ; GCN-LABEL: {{^}}dpp_mul:
 ; GCN: global_load_{{dword|b32}} [[V:v[0-9]+]],
 ; GCN: v_mov_b32_e32 [[V2:v[0-9]+]], [[V]]
diff --git a/llvm/test/MC/AMDGPU/gfx1251_asm_vop2_dpp16.s b/llvm/test/MC/AMDGPU/gfx1251_asm_vop2_dpp16.s
new file mode 100644
index 0000000000000..38bbc69fb3a72
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1251_asm_vop2_dpp16.s
@@ -0,0 +1,74 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1251 -show-encoding < %s | FileCheck --check-prefix=GFX1251 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s
+
+v_add_nc_u64 v[4:5], v[2:3], v[4:5] row_share:3 row_mask:0x3 bank_mask:0x0 fi:1
+// GFX1251: v_add_nc_u64_dpp v[4:5], v[2:3], v[4:5] row_share:3 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x08,0x08,0x50,0x02,0x53,0x05,0x30]
+// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1250-ERR-NEXT:{{^}}v_add_nc_u64 v[4:5], v[2:3], v[4:5] row_share:3 row_mask:0x3 bank_mask:0x0 fi:1
+// GFX1250-ERR-NEXT:{{^}}                                    ^
+
+v_add_nc_u64 v[4:5], v[2:3], v[4:5] row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1251: v_add_nc_u64_dpp v[4:5], v[2:3], v[4:5] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x50,0x02,0x50,0x01,0xff]
+// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1250-ERR-NEXT:{{^}}v_add_nc_u64 v[4:5], v[2:3], v[4:5] row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250-ERR-NEXT:{{^}}                                    ^
+
+v_add_nc_u64 v[4:5], v[2:3], v[4:5] row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1251: v_add_nc_u64_dpp v[4:5], v[2:3], v[4:5] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x08,0x08,0x50,0x02,0x5f,0x01,0x01]
+// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1250-ERR-NEXT:{{^}}v_add_nc_u64 v[4:5], v[2:3], v[4:5] row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250-ERR-NEXT:{{^}}                                    ^
+
+v_sub_nc_u64 v[4:5], v[2:3], v[4:5] row_share:3 row_mask:0x3 bank_mask:0x0 fi:1
+// GFX1251: v_sub_nc_u64_dpp v[4:5], v[2:3], v[4:5] row_share:3 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x08,0x08,0x52,0x02,0x53,0x05,0x30]
+// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1250-ERR-NEXT:{{^}}v_sub_nc_u64 v[4:5], v[2:3], v[4:5] row_share:3 row_mask:0x3 bank_mask:0x0 fi:1
+// GFX1250-ERR-NEXT:{{^}}                                    ^
+
+v_sub_nc_u64 v[4:5], v[2:3], v[4:5] row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1251: v_sub_nc_u64_dpp v[4:5], v[2:3], v[4:5] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x52,0x02,0x50,0x01,0xff]
+// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1250-ERR-NEXT:{{^}}v_sub_nc_u64 v[4:5], v[2:3], v[4:5] row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250-ERR-NEXT:{{^}}                                    ^
+
+v_sub_nc_u64 v[4:5], v[2:3], v[4:5] row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1251: v_sub_nc_u64_dpp v[4:5], v[2:3], v[4:5] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x08,0x08,0x52,0x02,0x5f,0x01,0x01]
+// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1250-ERR-NEXT:{{^}}v_sub_nc_u64 v[4:5], v[2:3], v[4:5] row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250-ERR-NEXT:{{^}}                                    ^
+
+v_fmac_f64 v[4:5], v[2:3], v[4:5] row_share:1
+// GFX1251: v_fmac_f64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x2e,0x02,0x51,0x01,0xff]
+// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1250-ERR-NEXT:{{^}}v_fmac_f64 v[4:5], v[2:3], v[4:5] row_share:1
+// GFX1250-ERR-NEXT:{{^}}                                  ^
+
+v_add_f64 v[4:5], v[2:3], v[4:5] row_share:1
+// GFX1251: v_add_f64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x04,0x02,0x51,0x01,0xff]
+// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1250-ERR-NEXT:{{^}}v_add_f64 v[4:5], v[2:3], v[4:5] row_share:1
+// GFX1250-ERR-NEXT:{{^}}                                 ^
+
+v_mul_f64 v[4:5], v[2:3], v[4:5] row_share:1
+// GFX1251: v_mul_f64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x0c,0x02,0x51,0x01,0xff]
+// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1250-ERR-NEXT:{{^}}v_mul_f64 v[4:5], v[2:3], v[4:5] row_share:1
+// GFX1250-ERR-NEXT:{{^}}                                 ^
+
+v_max_num_f64 v[4:5], v[2:3], v[4:5] row_share:1
+// GFX1251: v_max_num_f64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x1c,0x02,0x51,0x01,0xff]
+// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1250-ERR-NEXT:{{^}}v_max_num_f64 v[4:5], v[2:3], v[4:5] row_share:1
+// GFX1250-ERR-NEXT:{{^}}                                     ^
+
+v_min_num_f64 v[4:5], v[2:3], v[4:5] row_share:1
+// GFX1251: v_min_num_f64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x1a,0x02,0x51,0x01,0xff]
+// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1250-ERR-NEXT:{{^}}v_min_num_f64 v[4:5], v[2:3], v[4:5] row_share:1
+// GFX1250-ERR-NEXT:{{^}}                                     ^
+
+v_lshlrev_b64 v[4:5], v2, v[4:5] row_share:1
+// GFX1251: v_lshlrev_b64_dpp v[4:5], v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x3e,0x02,0x51,0x01,0xff]
+// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1250-ERR-NEXT:{{^}}v_lshlrev_b64 v[4:5], v2, v[4:5] row_share:1
+// GFX1250-ERR-NEXT:{{^}}                                 ^
diff --git a/llvm/test/MC/AMDGPU/gfx1251_asm_vop2_err.s b/llvm/test/MC/AMDGPU/gfx1251_asm_vop2_err.s
new file mode 100644
index 0000000000000..99d781d1e0fa1
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1251_asm_vop2_err.s
@@ -0,0 +1,106 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1251 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1251-ERR --implicit-check-not=error: --strict-whitespace %s
+
+v_add_nc_u64 v[2:3], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0]
+// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1251-ERR-NEXT:{{^}}v_add_nc_u64 v[2:3], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0]
+// GFX1251-ERR-NEXT:{{^}}                                    ^
+
+v_sub_nc_u64 v[2:3], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0]
+// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1251-ERR-NEXT:{{^}}v_sub_nc_u64 v[2:3], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0]
+// GFX1251-ERR-NEXT:{{^}}                                    ^
+
+v_fmac_f64 v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0]
+// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1251-ERR-NEXT:{{^}}v_fmac_f64 v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0]
+// GFX1251-ERR-NEXT:{{^}}                                  ^
+
+v_add_f64 v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0]
+// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1251-ERR-NEXT:{{^}}v_add_f64 v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0]
+// GFX1251-ERR-NEXT:{{^}}                                 ^
+
+v_mul_f64 v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0]
+// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1251-ERR-NEXT:{{^}}v_mul_f64 v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0]
+// GFX1251-ERR-NEXT:{{^}}                                 ^
+
+v_max_num_f64 v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0]
+// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1251-ERR-NEXT:{{^}}v_max_num_f64 v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0]
+// GFX1251-ERR-NEXT:{{^}}                                     ^
+
+v_min_num_f64 v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0]
+// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1251-ERR-NEXT:{{^}}v_min_num_f64 v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0]
+// GFX1251-ERR-NEXT:{{^}}                                     ^
+
+v_lshlrev_b64 v[4:5], v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0]
+// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1251-ERR-NEXT:{{^}}v_lshlrev_b64 v[4:5], v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0]
+// GFX1251-ERR-NEXT:{{^}}                                 ^
+
+v_fmamk_f64 v[4:5], v[2:3], 123.0, v[6:7] dpp8:[7,6,5,4,3,2,1,0]
+// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1251-ERR-NEXT:{{^}}v_fmamk_f64 v[4:5], v[2:3], 123.0, v[6:7] dpp8:[7,6,5,4,3,2,1,0]
+// GFX1251-ERR-NEXT:{{^}}                                          ^
+
+v_fmaak_f64 v[4:5], v[2:3], v[6:7], 123.0 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1251-ERR-NEXT:{{^}}v_fmaak_f64 v[4:5], v[2:3], v[6:7], 123.0 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1251-ERR-NEXT:{{^}}                                          ^
+
+v_add_nc_u64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0]
+// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share
+// GFX1251-ERR-NEXT:{{^}}v_add_nc_u64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0]
+// GFX1251-ERR-NEXT:{{^}}                                    ^
+
+v_sub_nc_u64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0]
+// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share
+// GFX1251-ERR-NEXT:{{^}}v_sub_nc_u64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0]
+// GFX1251-ERR-NEXT:{{^}}                                    ^
+
+v_fmac_f64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0]
+// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share
+// GFX1251-ERR-NEXT:{{^}}v_fmac_f64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0]
+// GFX1251-ERR-NEXT:{{^}}                                  ^
+
+v_add_f64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0]
+// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share
+// GFX1251-ERR-NEXT:{{^}}v_add_f64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0]
+// GFX1251-ERR-NEXT:{{^}}                                 ^
+
+v_mul_f64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0]
+// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share
+// GFX1251-ERR-NEXT:{{^}}v_mul_f64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0]
+// GFX1251-ERR-NEXT:{{^}}                                 ^
+
+v_max_num_f64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0]
+// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share
+// GFX1251-ERR-NEXT:{{^}}v_max_num_f64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0]
+// GFX1251-ERR-NEXT:{{^}}                                     ^
+
+v_min_num_f64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0]
+// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share
+// GFX1251-ERR-NEXT:{{^}}v_min_num_f64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0]
+// GFX1251-ERR-NEXT:{{^}}                                     ^
+
+v_lshlrev_b64 v[4:5], v2, v[4:5] quad_perm:[3,2,1,0]
+// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share
+// GFX1251-ERR-NEXT:{{^}}v_lshlrev_b64 v[4:5], v2, v[4:5] quad_perm:[3,2,1,0]
+// GFX1251-ERR-NEXT:{{^}}                                 ^
+
+v_mul_u64 v[2:3], v[4:5], v[6:7] row_share:1
+// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1251-ERR-NEXT:{{^}}v_mul_u64 v[2:3], v[4:5], v[6:7] row_share:1
+// GFX1251-ERR-NEXT:{{^}}                                 ^
+
+v_fmamk_f64 v[4:5], v[2:3], 123.0, v[6:7] row_share:1
+// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1251-ERR-NEXT:{{^}}v_fmamk_f64 v[4:5], v[2:3], 123.0, v[6:7] row_share:1
+// GFX1251-ERR-NEXT:{{^}}                                          ^
+
+v_fmaak_f64 v[4:5], v[2:3], v[6:7], 123.0 row_share:1
+// GFX1251-ER...
[truncated]

…p_support

shiltian · 2025-09-18T22:26:55Z

stack messed?

rampitec · 2025-09-18T22:30:32Z

stack messed?

Hm... No? It was briefly messed after VOP1 landed, but it is now only VOP2 changes.

shiltian · 2025-09-18T22:32:26Z

Okay, the "Commits" tab still shows three commits, which has a VOP1 one.

rampitec · 2025-09-18T22:33:53Z

Okay, the "Commits" tab still shows three commits, which has a VOP1 one.

Files changed tab looks correct though.

rampitec added 2 commits September 18, 2025 12:35

[AMDGPU] gfx1251 VOP1 dpp support

dfcc9d2

[AMDGPU] gfx1251 VOP2 dpp support

344bfe1

rampitec mentioned this pull request Sep 18, 2025

[AMDGPU] gfx1251 VOP1 dpp support #159637

Merged

rampitec requested a review from shiltian September 18, 2025 20:06

rampitec marked this pull request as ready for review September 18, 2025 20:06

llvmbot added the backend:AMDGPU label Sep 18, 2025

Base automatically changed from users/rampitec/09-18-_amdgpu_gfx1251_vop1_dpp_support to main September 18, 2025 20:42

Merge branch 'main' into users/rampitec/09-18-_amdgpu_gfx1251_vop2_dp…

ace386e

…p_support

rampitec mentioned this pull request Sep 18, 2025

[AMDGPU] gfx1251 VOP3 dpp support #159654

Merged

shiltian approved these changes Sep 18, 2025

View reviewed changes

rampitec merged commit 8cfbace into main Sep 18, 2025
9 checks passed

rampitec deleted the users/rampitec/09-18-_amdgpu_gfx1251_vop2_dpp_support branch September 18, 2025 22:38

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU] gfx1251 VOP2 dpp support #159641

[AMDGPU] gfx1251 VOP2 dpp support #159641

Uh oh!

rampitec commented Sep 18, 2025

Uh oh!

rampitec commented Sep 18, 2025 •

edited

Loading

Uh oh!

llvmbot commented Sep 18, 2025

Uh oh!

shiltian commented Sep 18, 2025

Uh oh!

rampitec commented Sep 18, 2025

Uh oh!

shiltian commented Sep 18, 2025

Uh oh!

rampitec commented Sep 18, 2025

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

[AMDGPU] gfx1251 VOP2 dpp support #159641

[AMDGPU] gfx1251 VOP2 dpp support #159641

Uh oh!

Conversation

rampitec commented Sep 18, 2025

Uh oh!

rampitec commented Sep 18, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Sep 18, 2025

Uh oh!

shiltian commented Sep 18, 2025

Uh oh!

rampitec commented Sep 18, 2025

Uh oh!

shiltian commented Sep 18, 2025

Uh oh!

rampitec commented Sep 18, 2025

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

rampitec commented Sep 18, 2025 •

edited

Loading