[AMDGPU] Add gfx1250 v_fmac_f64 implementation #148725

rampitec · 2025-07-14T21:09:03Z

No description provided.

rampitec · 2025-07-14T21:09:20Z

This stack of pull requests is managed by Graphite. Learn more about stacking.

llvmbot · 2025-07-14T21:10:36Z

@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)

Changes

Patch is 23.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148725.diff

5 Files Affected:

(modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+1)
(modified) llvm/lib/Target/AMDGPU/VOP2Instructions.td (+14-5)
(modified) llvm/test/CodeGen/AMDGPU/fma.f64.ll (+23-22)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s (+152)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt (+111)

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index a32078cc403e7..9df2bdededa13 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -639,6 +639,7 @@ bool isMAC(unsigned Opc) {
          Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
          Opc == AMDGPU::V_MAC_F16_e64_vi ||
          Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
+         Opc == AMDGPU::V_FMAC_F64_e64_gfx12 ||
          Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
          Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
          Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 25c6cbc3e1ab5..030a6e1e978c1 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -175,10 +175,14 @@ multiclass VOP2Inst_e64<string opName,
     def _e64 : VOP3InstBase <opName, P, node, 1>,
                Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
 
-    let SubtargetPredicate = isGFX11Plus in {
-      if P.HasExtVOP3DPP then
-        def _e64_dpp  : VOP3_DPP_Pseudo <opName, P>;
-    } // End SubtargetPredicate = isGFX11Plus
+    if P.HasExtVOP3DPP then
+      def _e64_dpp  : VOP3_DPP_Pseudo <opName, P> {
+        let SubtargetPredicate = isGFX11Plus;
+      }
+    else if P.HasExt64BitDPP then
+      def _e64_dpp  : VOP3_DPP_Pseudo <opName, P> {
+        let OtherPredicates = [HasDPALU_DPP];
+      }
 }
 
 multiclass VOP2Inst_e64_VOPD<string opName,
@@ -1492,7 +1496,9 @@ class Base_VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps,
     VOP2_DPP<op, ps, opName, p, 1> {
   let AssemblerPredicate = HasDPP16;
   let SubtargetPredicate = ps.SubtargetPredicate;
-  let OtherPredicates = ps.OtherPredicates;
+  let OtherPredicates = !listconcat(ps.OtherPredicates,
+      !if(p.HasExt64BitDPP, [HasDPALU_DPP], []),
+      !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []));
 }
 
 class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, int subtarget,
@@ -1832,6 +1838,9 @@ let SubtargetPredicate = isGFX12Plus in {
     V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx12, "v_subrev_co_ci_u32">;
 } // End SubtargetPredicate = isGFX12Plus
 
+let SubtargetPredicate = HasFmacF64Inst in
+defm V_FMAC_F64 : VOP2_Real_FULL<GFX12Gen, 0x17>;
+
 defm V_FMAMK_F64 : VOP2Only_Real_MADK64<GFX1250Gen, 0x23>;
 defm V_FMAAK_F64 : VOP2Only_Real_MADK64<GFX1250Gen, 0x24>;
 
diff --git a/llvm/test/CodeGen/AMDGPU/fma.f64.ll b/llvm/test/CodeGen/AMDGPU/fma.f64.ll
index 99f7d4da685d6..e4488258dcf88 100644
--- a/llvm/test/CodeGen/AMDGPU/fma.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fma.f64.ll
@@ -1,7 +1,8 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,SIGFX11 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,SIGFX11 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx90a -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX90A %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,SIGFX11 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,FMA_F64 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,FMA_F64 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx90a -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,FMAC_F64 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,FMA_F64 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=FUNC,GCN,FMAC_F64 %s
 
 declare double @llvm.fma.f64(double, double, double) nounwind readnone
 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
@@ -9,8 +10,8 @@ declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) n
 declare double @llvm.fabs.f64(double) nounwind readnone
 
 ; FUNC-LABEL: {{^}}fma_f64:
-; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 define amdgpu_kernel void @fma_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
                      ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
    %r0 = load double, ptr addrspace(1) %in1
@@ -22,10 +23,10 @@ define amdgpu_kernel void @fma_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
 }
 
 ; FUNC-LABEL: {{^}}fma_v2f64:
-; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 define amdgpu_kernel void @fma_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
                        ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
    %r0 = load <2 x double>, ptr addrspace(1) %in1
@@ -37,14 +38,14 @@ define amdgpu_kernel void @fma_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in
 }
 
 ; FUNC-LABEL: {{^}}fma_v4f64:
-; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 define amdgpu_kernel void @fma_v4f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
                        ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
    %r0 = load <4 x double>, ptr addrspace(1) %in1
@@ -176,8 +177,8 @@ define amdgpu_kernel void @fma_f64_abs_neg_src2(ptr addrspace(1) %out, ptr addrs
 }
 
 ; FUNC-LABEL: {{^}}fma_f64_lit_src0:
-; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
-; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
+; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
+; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
 define amdgpu_kernel void @fma_f64_lit_src0(ptr addrspace(1) %out,
                      ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
    %r1 = load double, ptr addrspace(1) %in2
@@ -188,8 +189,8 @@ define amdgpu_kernel void @fma_f64_lit_src0(ptr addrspace(1) %out,
 }
 
 ; FUNC-LABEL: {{^}}fma_f64_lit_src1:
-; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
-; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
+; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
+; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
 define amdgpu_kernel void @fma_f64_lit_src1(ptr addrspace(1) %out, ptr addrspace(1) %in1,
                      ptr addrspace(1) %in3) {
    %r0 = load double, ptr addrspace(1) %in1
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s
index cc14e4caf851e..20bc578605b8c 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s
@@ -2,6 +2,158 @@
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX1200-ERR --implicit-check-not=error: %s
 
+v_fmac_f64 v[4:5], v[2:3], v[4:5]
+// GFX1250: v_fmac_f64_e32 v[4:5], v[2:3], v[4:5]   ; encoding: [0x02,0x09,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[254:255], v[2:3], v[4:5]
+// GFX1250: v_fmac_f64_e32 v[254:255], v[2:3], v[4:5] ; encoding: [0x02,0x09,0xfc,0x2f]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[254:255], v[4:5]
+// GFX1250: v_fmac_f64_e32 v[4:5], v[254:255], v[4:5] ; encoding: [0xfe,0x09,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], vcc, v[4:5]
+// GFX1250: v_fmac_f64_e32 v[4:5], vcc, v[4:5]      ; encoding: [0x6a,0x08,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], exec, v[4:5]
+// GFX1250: v_fmac_f64_e32 v[4:5], exec, v[4:5]     ; encoding: [0x7e,0x08,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], 0, v[4:5]
+// GFX1250: v_fmac_f64_e32 v[4:5], 0, v[4:5]        ; encoding: [0x80,0x08,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], -1, v[4:5]
+// GFX1250: v_fmac_f64_e32 v[4:5], -1, v[4:5]       ; encoding: [0xc1,0x08,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], 0.5, v[4:5]
+// GFX1250: v_fmac_f64_e32 v[4:5], 0.5, v[4:5]      ; encoding: [0xf0,0x08,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], -4.0, v[4:5]
+// GFX1250: v_fmac_f64_e32 v[4:5], -4.0, v[4:5]     ; encoding: [0xf7,0x08,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], 0xaf123456, v[4:5]
+// GFX1250: v_fmac_f64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xff,0x08,0x08,0x2e,0x56,0x34,0x12,0xaf]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], 0x3f717273, v[4:5]
+// GFX1250: v_fmac_f64_e32 v[4:5], 0x3f717273, v[4:5] ; encoding: [0xff,0x08,0x08,0x2e,0x73,0x72,0x71,0x3f]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], v[254:255]
+// GFX1250: v_fmac_f64_e32 v[4:5], v[2:3], v[254:255] ; encoding: [0x02,0xfd,0x09,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], v[8:9]
+// GFX1250: v_fmac_f64_e32 v[4:5], v[2:3], v[8:9]   ; encoding: [0x02,0x11,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[254:255], v[2:3], v[8:9]
+// GFX1250: v_fmac_f64_e32 v[254:255], v[2:3], v[8:9] ; encoding: [0x02,0x11,0xfc,0x2f]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[254:255], v[8:9]
+// GFX1250: v_fmac_f64_e32 v[4:5], v[254:255], v[8:9] ; encoding: [0xfe,0x11,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], vcc, v[8:9]
+// GFX1250: v_fmac_f64_e32 v[4:5], vcc, v[8:9]      ; encoding: [0x6a,0x10,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], exec, v[8:9]
+// GFX1250: v_fmac_f64_e32 v[4:5], exec, v[8:9]     ; encoding: [0x7e,0x10,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], 0, v[8:9]
+// GFX1250: v_fmac_f64_e32 v[4:5], 0, v[8:9]        ; encoding: [0x80,0x10,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], -1, v[8:9]
+// GFX1250: v_fmac_f64_e32 v[4:5], -1, v[8:9]       ; encoding: [0xc1,0x10,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], 0.5, v[8:9]
+// GFX1250: v_fmac_f64_e32 v[4:5], 0.5, v[8:9]      ; encoding: [0xf0,0x10,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], -4.0, v[8:9]
+// GFX1250: v_fmac_f64_e32 v[4:5], -4.0, v[8:9]     ; encoding: [0xf7,0x10,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], v[254:255]
+// GFX1250: v_fmac_f64_e32 v[4:5], v[2:3], v[254:255] ; encoding: [0x02,0xfd,0x09,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], vcc
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], vcc      ; encoding: [0x04,0x00,0x17,0xd5,0x02,0xd5,0x00,0x00]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], exec
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], exec     ; encoding: [0x04,0x00,0x17,0xd5,0x02,0xfd,0x00,0x00]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], 0
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], 0        ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x01,0x01,0x00]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], -1
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], -1       ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x83,0x01,0x00]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], 0.5
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], 0.5      ; encoding: [0x04,0x00,0x17,0xd5,0x02,0xe1,0x01,0x00]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], -4.0
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], -4.0     ; encoding: [0x04,0x00,0x17,0xd5,0x02,0xef,0x01,0x00]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], -v[2:3], v[8:9]
+// GFX1250: v_fmac_f64_e64 v[4:5], -v[2:3], v[8:9]  ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x20]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], -v[8:9]
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], -v[8:9]  ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x40]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], -v[2:3], -v[8:9]
+// GFX1250: v_fmac_f64_e64 v[4:5], -v[2:3], -v[8:9] ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x60]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], |v[2:3]|, v[8:9]
+// GFX1250: v_fmac_f64_e64 v[4:5], |v[2:3]|, v[8:9] ; encoding: [0x04,0x01,0x17,0xd5,0x02,0x11,0x02,0x00]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], |v[8:9]|
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], |v[8:9]| ; encoding: [0x04,0x02,0x17,0xd5,0x02,0x11,0x02,0x00]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], |v[2:3]|, |v[8:9]|
+// GFX1250: v_fmac_f64_e64 v[4:5], |v[2:3]|, |v[8:9]| ; encoding: [0x04,0x03,0x17,0xd5,0x02,0x11,0x02,0x00]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], v[8:9] clamp
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] clamp ; encoding: [0x04,0x80,0x17,0xd5,0x02,0x11,0x02,0x00]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], v[8:9] mul:2
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] mul:2 ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x08]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], v[8:9] mul:4
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] mul:4 ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x10]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], v[8:9] div:2
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] div:2 ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x18]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_fmamk_f64 v[6:7], v[4:5], 0x405ec000, v[2:3]
 // GFX1250: v_fmamk_f64 v[6:7], v[4:5], 0x405ec000, v[2:3] ; encoding: [0x04,0x05,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40]
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt
index f0fcddb06599f..c1213f2d9ec0d 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt
@@ -1,6 +1,117 @@
 # NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
 
+0x02,0x09,0xfc,0x2f
+# GFX1250: v_fmac_f64_e32 v[254:255], v[2:3], v[4:5] ; encoding: [0x02,0x09,0xfc,0x2f]
+
+0x02,0x11,0xfc,0x2f
+# GFX1250: v_fmac_f64_e32 v[254:255], v[2:3], v[8:9] ; encoding: [0x02,0x11,0xfc,0x2f]
+
+0xc1,0x08,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], -1, v[4:5]       ; encoding: [0xc1,0x08,0x08,0x2e]
+
+0xc1,0x10,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], -1, v[8:9]       ; encoding: [0xc1,0x10,0x08,0x2e]
+
+0xf7,0x08,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], -4.0, v[4:5]     ; encoding: [0xf7,0x08,0x08,0x2e]
+
+0xf7,0x10,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], -4.0, v[8:9]     ; encoding: [0xf7,0x10,0x08,0x2e]
+
+0x80,0x08,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], 0, v[4:5]        ; encoding: [0x80,0x08,0x08,0x2e]
+
+0x80,0x10,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], 0, v[8:9]        ; encoding: [0x80,0x10,0x08,0x2e]
+
+0xf0,0x08,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], 0.5, v[4:5]      ; encoding: [0xf0,0x08,0x08,0x2e]
+
+0xf0,0x10,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], 0.5, v[8:9]      ; encoding: [0xf0,0x10,0x08,0x2e]
+
+0xff,0x08,0x08,0x2e,0x73,0x72,0x71,0x3f
+# GFX1250: v_fmac_f64_e32 v[4:5], 0x3f717273, v[4:5] ; encoding: [0xff,0x08,0x08,0x2e,0x73,0x72,0x71,0x3f]
+
+0xff,0x08,0x08,0x2e,0x56,0x34,0x12,0xaf
+# GFX1250: v_fmac_f64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xff,0x08,0x08,0x2e,0x56,0x34,0x12,0xaf]
+
+0x7e,0x08,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], exec, v[4:5]     ; encoding: [0x7e,0x08,0x08,0x2e]
+
+0x7e,0x10,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], exec, v[8:9]     ; encoding: [0x7e,0x10,0x08,0x2e]
+
+0xfe,0x09,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], v[254:255], v[4:5] ; encoding: [0xfe,0x09,0x08,0x2e]
+
+0xfe,0x11,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], v[254:255], v[8:9] ; encoding: [0xfe,0x11,0x08,0x2e]
+
+0x02,0xfd,0x09,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], v[2:3],...
[truncated]

llvmbot · 2025-07-14T21:10:37Z

@llvm/pr-subscribers-mc

Author: Stanislav Mekhanoshin (rampitec)

Changes

Patch is 23.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148725.diff

5 Files Affected:

(modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+1)
(modified) llvm/lib/Target/AMDGPU/VOP2Instructions.td (+14-5)
(modified) llvm/test/CodeGen/AMDGPU/fma.f64.ll (+23-22)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s (+152)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt (+111)

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index a32078cc403e7..9df2bdededa13 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -639,6 +639,7 @@ bool isMAC(unsigned Opc) {
          Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
          Opc == AMDGPU::V_MAC_F16_e64_vi ||
          Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
+         Opc == AMDGPU::V_FMAC_F64_e64_gfx12 ||
          Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
          Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
          Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 25c6cbc3e1ab5..030a6e1e978c1 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -175,10 +175,14 @@ multiclass VOP2Inst_e64<string opName,
     def _e64 : VOP3InstBase <opName, P, node, 1>,
                Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
 
-    let SubtargetPredicate = isGFX11Plus in {
-      if P.HasExtVOP3DPP then
-        def _e64_dpp  : VOP3_DPP_Pseudo <opName, P>;
-    } // End SubtargetPredicate = isGFX11Plus
+    if P.HasExtVOP3DPP then
+      def _e64_dpp  : VOP3_DPP_Pseudo <opName, P> {
+        let SubtargetPredicate = isGFX11Plus;
+      }
+    else if P.HasExt64BitDPP then
+      def _e64_dpp  : VOP3_DPP_Pseudo <opName, P> {
+        let OtherPredicates = [HasDPALU_DPP];
+      }
 }
 
 multiclass VOP2Inst_e64_VOPD<string opName,
@@ -1492,7 +1496,9 @@ class Base_VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps,
     VOP2_DPP<op, ps, opName, p, 1> {
   let AssemblerPredicate = HasDPP16;
   let SubtargetPredicate = ps.SubtargetPredicate;
-  let OtherPredicates = ps.OtherPredicates;
+  let OtherPredicates = !listconcat(ps.OtherPredicates,
+      !if(p.HasExt64BitDPP, [HasDPALU_DPP], []),
+      !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []));
 }
 
 class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, int subtarget,
@@ -1832,6 +1838,9 @@ let SubtargetPredicate = isGFX12Plus in {
     V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx12, "v_subrev_co_ci_u32">;
 } // End SubtargetPredicate = isGFX12Plus
 
+let SubtargetPredicate = HasFmacF64Inst in
+defm V_FMAC_F64 : VOP2_Real_FULL<GFX12Gen, 0x17>;
+
 defm V_FMAMK_F64 : VOP2Only_Real_MADK64<GFX1250Gen, 0x23>;
 defm V_FMAAK_F64 : VOP2Only_Real_MADK64<GFX1250Gen, 0x24>;
 
diff --git a/llvm/test/CodeGen/AMDGPU/fma.f64.ll b/llvm/test/CodeGen/AMDGPU/fma.f64.ll
index 99f7d4da685d6..e4488258dcf88 100644
--- a/llvm/test/CodeGen/AMDGPU/fma.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fma.f64.ll
@@ -1,7 +1,8 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,SIGFX11 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,SIGFX11 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx90a -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX90A %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,SIGFX11 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,FMA_F64 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,FMA_F64 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx90a -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,FMAC_F64 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,FMA_F64 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=FUNC,GCN,FMAC_F64 %s
 
 declare double @llvm.fma.f64(double, double, double) nounwind readnone
 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
@@ -9,8 +10,8 @@ declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) n
 declare double @llvm.fabs.f64(double) nounwind readnone
 
 ; FUNC-LABEL: {{^}}fma_f64:
-; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 define amdgpu_kernel void @fma_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
                      ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
    %r0 = load double, ptr addrspace(1) %in1
@@ -22,10 +23,10 @@ define amdgpu_kernel void @fma_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
 }
 
 ; FUNC-LABEL: {{^}}fma_v2f64:
-; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 define amdgpu_kernel void @fma_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
                        ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
    %r0 = load <2 x double>, ptr addrspace(1) %in1
@@ -37,14 +38,14 @@ define amdgpu_kernel void @fma_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in
 }
 
 ; FUNC-LABEL: {{^}}fma_v4f64:
-; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 define amdgpu_kernel void @fma_v4f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
                        ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
    %r0 = load <4 x double>, ptr addrspace(1) %in1
@@ -176,8 +177,8 @@ define amdgpu_kernel void @fma_f64_abs_neg_src2(ptr addrspace(1) %out, ptr addrs
 }
 
 ; FUNC-LABEL: {{^}}fma_f64_lit_src0:
-; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
-; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
+; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
+; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
 define amdgpu_kernel void @fma_f64_lit_src0(ptr addrspace(1) %out,
                      ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
    %r1 = load double, ptr addrspace(1) %in2
@@ -188,8 +189,8 @@ define amdgpu_kernel void @fma_f64_lit_src0(ptr addrspace(1) %out,
 }
 
 ; FUNC-LABEL: {{^}}fma_f64_lit_src1:
-; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
-; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
+; FMA_F64: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
+; FMAC_F64: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
 define amdgpu_kernel void @fma_f64_lit_src1(ptr addrspace(1) %out, ptr addrspace(1) %in1,
                      ptr addrspace(1) %in3) {
    %r0 = load double, ptr addrspace(1) %in1
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s
index cc14e4caf851e..20bc578605b8c 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s
@@ -2,6 +2,158 @@
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX1200-ERR --implicit-check-not=error: %s
 
+v_fmac_f64 v[4:5], v[2:3], v[4:5]
+// GFX1250: v_fmac_f64_e32 v[4:5], v[2:3], v[4:5]   ; encoding: [0x02,0x09,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[254:255], v[2:3], v[4:5]
+// GFX1250: v_fmac_f64_e32 v[254:255], v[2:3], v[4:5] ; encoding: [0x02,0x09,0xfc,0x2f]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[254:255], v[4:5]
+// GFX1250: v_fmac_f64_e32 v[4:5], v[254:255], v[4:5] ; encoding: [0xfe,0x09,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], vcc, v[4:5]
+// GFX1250: v_fmac_f64_e32 v[4:5], vcc, v[4:5]      ; encoding: [0x6a,0x08,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], exec, v[4:5]
+// GFX1250: v_fmac_f64_e32 v[4:5], exec, v[4:5]     ; encoding: [0x7e,0x08,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], 0, v[4:5]
+// GFX1250: v_fmac_f64_e32 v[4:5], 0, v[4:5]        ; encoding: [0x80,0x08,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], -1, v[4:5]
+// GFX1250: v_fmac_f64_e32 v[4:5], -1, v[4:5]       ; encoding: [0xc1,0x08,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], 0.5, v[4:5]
+// GFX1250: v_fmac_f64_e32 v[4:5], 0.5, v[4:5]      ; encoding: [0xf0,0x08,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], -4.0, v[4:5]
+// GFX1250: v_fmac_f64_e32 v[4:5], -4.0, v[4:5]     ; encoding: [0xf7,0x08,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], 0xaf123456, v[4:5]
+// GFX1250: v_fmac_f64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xff,0x08,0x08,0x2e,0x56,0x34,0x12,0xaf]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], 0x3f717273, v[4:5]
+// GFX1250: v_fmac_f64_e32 v[4:5], 0x3f717273, v[4:5] ; encoding: [0xff,0x08,0x08,0x2e,0x73,0x72,0x71,0x3f]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], v[254:255]
+// GFX1250: v_fmac_f64_e32 v[4:5], v[2:3], v[254:255] ; encoding: [0x02,0xfd,0x09,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], v[8:9]
+// GFX1250: v_fmac_f64_e32 v[4:5], v[2:3], v[8:9]   ; encoding: [0x02,0x11,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[254:255], v[2:3], v[8:9]
+// GFX1250: v_fmac_f64_e32 v[254:255], v[2:3], v[8:9] ; encoding: [0x02,0x11,0xfc,0x2f]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[254:255], v[8:9]
+// GFX1250: v_fmac_f64_e32 v[4:5], v[254:255], v[8:9] ; encoding: [0xfe,0x11,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], vcc, v[8:9]
+// GFX1250: v_fmac_f64_e32 v[4:5], vcc, v[8:9]      ; encoding: [0x6a,0x10,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], exec, v[8:9]
+// GFX1250: v_fmac_f64_e32 v[4:5], exec, v[8:9]     ; encoding: [0x7e,0x10,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], 0, v[8:9]
+// GFX1250: v_fmac_f64_e32 v[4:5], 0, v[8:9]        ; encoding: [0x80,0x10,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], -1, v[8:9]
+// GFX1250: v_fmac_f64_e32 v[4:5], -1, v[8:9]       ; encoding: [0xc1,0x10,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], 0.5, v[8:9]
+// GFX1250: v_fmac_f64_e32 v[4:5], 0.5, v[8:9]      ; encoding: [0xf0,0x10,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], -4.0, v[8:9]
+// GFX1250: v_fmac_f64_e32 v[4:5], -4.0, v[8:9]     ; encoding: [0xf7,0x10,0x08,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], v[254:255]
+// GFX1250: v_fmac_f64_e32 v[4:5], v[2:3], v[254:255] ; encoding: [0x02,0xfd,0x09,0x2e]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], vcc
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], vcc      ; encoding: [0x04,0x00,0x17,0xd5,0x02,0xd5,0x00,0x00]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], exec
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], exec     ; encoding: [0x04,0x00,0x17,0xd5,0x02,0xfd,0x00,0x00]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], 0
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], 0        ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x01,0x01,0x00]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], -1
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], -1       ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x83,0x01,0x00]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], 0.5
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], 0.5      ; encoding: [0x04,0x00,0x17,0xd5,0x02,0xe1,0x01,0x00]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], -4.0
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], -4.0     ; encoding: [0x04,0x00,0x17,0xd5,0x02,0xef,0x01,0x00]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], -v[2:3], v[8:9]
+// GFX1250: v_fmac_f64_e64 v[4:5], -v[2:3], v[8:9]  ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x20]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], -v[8:9]
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], -v[8:9]  ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x40]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], -v[2:3], -v[8:9]
+// GFX1250: v_fmac_f64_e64 v[4:5], -v[2:3], -v[8:9] ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x60]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], |v[2:3]|, v[8:9]
+// GFX1250: v_fmac_f64_e64 v[4:5], |v[2:3]|, v[8:9] ; encoding: [0x04,0x01,0x17,0xd5,0x02,0x11,0x02,0x00]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], |v[8:9]|
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], |v[8:9]| ; encoding: [0x04,0x02,0x17,0xd5,0x02,0x11,0x02,0x00]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], |v[2:3]|, |v[8:9]|
+// GFX1250: v_fmac_f64_e64 v[4:5], |v[2:3]|, |v[8:9]| ; encoding: [0x04,0x03,0x17,0xd5,0x02,0x11,0x02,0x00]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], v[8:9] clamp
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] clamp ; encoding: [0x04,0x80,0x17,0xd5,0x02,0x11,0x02,0x00]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], v[8:9] mul:2
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] mul:2 ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x08]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], v[8:9] mul:4
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] mul:4 ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x10]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_fmac_f64 v[4:5], v[2:3], v[8:9] div:2
+// GFX1250: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] div:2 ; encoding: [0x04,0x00,0x17,0xd5,0x02,0x11,0x02,0x18]
+// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_fmamk_f64 v[6:7], v[4:5], 0x405ec000, v[2:3]
 // GFX1250: v_fmamk_f64 v[6:7], v[4:5], 0x405ec000, v[2:3] ; encoding: [0x04,0x05,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40]
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt
index f0fcddb06599f..c1213f2d9ec0d 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt
@@ -1,6 +1,117 @@
 # NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
 
+0x02,0x09,0xfc,0x2f
+# GFX1250: v_fmac_f64_e32 v[254:255], v[2:3], v[4:5] ; encoding: [0x02,0x09,0xfc,0x2f]
+
+0x02,0x11,0xfc,0x2f
+# GFX1250: v_fmac_f64_e32 v[254:255], v[2:3], v[8:9] ; encoding: [0x02,0x11,0xfc,0x2f]
+
+0xc1,0x08,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], -1, v[4:5]       ; encoding: [0xc1,0x08,0x08,0x2e]
+
+0xc1,0x10,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], -1, v[8:9]       ; encoding: [0xc1,0x10,0x08,0x2e]
+
+0xf7,0x08,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], -4.0, v[4:5]     ; encoding: [0xf7,0x08,0x08,0x2e]
+
+0xf7,0x10,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], -4.0, v[8:9]     ; encoding: [0xf7,0x10,0x08,0x2e]
+
+0x80,0x08,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], 0, v[4:5]        ; encoding: [0x80,0x08,0x08,0x2e]
+
+0x80,0x10,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], 0, v[8:9]        ; encoding: [0x80,0x10,0x08,0x2e]
+
+0xf0,0x08,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], 0.5, v[4:5]      ; encoding: [0xf0,0x08,0x08,0x2e]
+
+0xf0,0x10,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], 0.5, v[8:9]      ; encoding: [0xf0,0x10,0x08,0x2e]
+
+0xff,0x08,0x08,0x2e,0x73,0x72,0x71,0x3f
+# GFX1250: v_fmac_f64_e32 v[4:5], 0x3f717273, v[4:5] ; encoding: [0xff,0x08,0x08,0x2e,0x73,0x72,0x71,0x3f]
+
+0xff,0x08,0x08,0x2e,0x56,0x34,0x12,0xaf
+# GFX1250: v_fmac_f64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xff,0x08,0x08,0x2e,0x56,0x34,0x12,0xaf]
+
+0x7e,0x08,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], exec, v[4:5]     ; encoding: [0x7e,0x08,0x08,0x2e]
+
+0x7e,0x10,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], exec, v[8:9]     ; encoding: [0x7e,0x10,0x08,0x2e]
+
+0xfe,0x09,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], v[254:255], v[4:5] ; encoding: [0xfe,0x09,0x08,0x2e]
+
+0xfe,0x11,0x08,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], v[254:255], v[8:9] ; encoding: [0xfe,0x11,0x08,0x2e]
+
+0x02,0xfd,0x09,0x2e
+# GFX1250: v_fmac_f64_e32 v[4:5], v[2:3],...
[truncated]

[AMDGPU] Add gfx1250 v_fmac_f64 implementation

253eff3

rampitec requested review from changpeng and shiltian July 14, 2025 21:10

rampitec marked this pull request as ready for review July 14, 2025 21:10

llvmbot added backend:AMDGPU llvm:mc Machine (object) code labels Jul 14, 2025

rampitec mentioned this pull request Jul 14, 2025

[AMDGPU] Use 64-bit literals in codegen on gfx1250 #148727

Merged

shiltian approved these changes Jul 14, 2025

View reviewed changes

rampitec mentioned this pull request Jul 14, 2025

[AMDGPU] Codegen support for v_fmaak_f64/f_fmamk_f64 #148734

Merged

rampitec merged commit 5277021 into main Jul 14, 2025
14 checks passed

rampitec deleted the users/rampitec/07-14-_amdgpu_add_gfx1250_v_fmac_f64_implementation branch July 14, 2025 22:39

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU] Add gfx1250 v_fmac_f64 implementation #148725

[AMDGPU] Add gfx1250 v_fmac_f64 implementation #148725

Uh oh!

rampitec commented Jul 14, 2025

Uh oh!

rampitec commented Jul 14, 2025 •

edited

Loading

Uh oh!

llvmbot commented Jul 14, 2025

Uh oh!

llvmbot commented Jul 14, 2025

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[AMDGPU] Add gfx1250 v_fmac_f64 implementation #148725

[AMDGPU] Add gfx1250 v_fmac_f64 implementation #148725

Uh oh!

Conversation

rampitec commented Jul 14, 2025

Uh oh!

rampitec commented Jul 14, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jul 14, 2025

Uh oh!

llvmbot commented Jul 14, 2025

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

rampitec commented Jul 14, 2025 •

edited

Loading