Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3513,6 +3513,10 @@ static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc) {
? AMDGPU::V_FMAAK_F16_t16
: AMDGPU::V_FMAAK_F16_fake16
: AMDGPU::V_FMAAK_F16;
case AMDGPU::V_FMAC_F64_e32:
case AMDGPU::V_FMAC_F64_e64:
case AMDGPU::V_FMA_F64_e64:
return AMDGPU::V_FMAAK_F64;
default:
llvm_unreachable("invalid instruction");
}
Expand Down Expand Up @@ -3541,6 +3545,10 @@ static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc) {
? AMDGPU::V_FMAMK_F16_t16
: AMDGPU::V_FMAMK_F16_fake16
: AMDGPU::V_FMAMK_F16;
case AMDGPU::V_FMAC_F64_e32:
case AMDGPU::V_FMAC_F64_e64:
case AMDGPU::V_FMA_F64_e64:
return AMDGPU::V_FMAMK_F64;
default:
llvm_unreachable("invalid instruction");
}
Expand Down Expand Up @@ -3619,7 +3627,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
Opc == AMDGPU::V_FMAC_F16_fake16_e64) {
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMA_F64_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64) {
// Don't fold if we are using source or output modifiers. The new VOP2
// instructions don't have them.
if (hasAnyModifiersSet(UseMI))
Expand Down Expand Up @@ -3691,7 +3700,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,

if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 || Opc == AMDGPU::V_FMAC_F64_e64)
UseMI.untieRegOperand(
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));

Expand Down Expand Up @@ -3759,7 +3769,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,

if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 || Opc == AMDGPU::V_FMAC_F64_e64)
UseMI.untieRegOperand(
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));

Expand Down Expand Up @@ -4080,8 +4091,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
const MachineOperand *OpSel = getNamedOperand(MI, AMDGPU::OpName::op_sel);

if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
!IsLegacy &&
if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
(!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
// If we have an SGPR input, we will violate the constant bus restriction.
(ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
Expand Down
12 changes: 11 additions & 1 deletion llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,10 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
NewOpcode = AMDGPU::V_FMAAK_F16_fake16;
break;
case AMDGPU::V_FMA_F64_e64:
if (ST->hasFmaakFmamkF64Insts())
NewOpcode = AMDGPU::V_FMAAK_F64;
break;
}
}

Expand Down Expand Up @@ -497,6 +501,10 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
NewOpcode = AMDGPU::V_FMAMK_F16_fake16;
break;
case AMDGPU::V_FMA_F64_e64:
if (ST->hasFmaakFmamkF64Insts())
NewOpcode = AMDGPU::V_FMAMK_F64;
break;
}
}

Expand Down Expand Up @@ -961,7 +969,9 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_t16_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64) {
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64 ||
(MI.getOpcode() == AMDGPU::V_FMA_F64_e64 &&
ST->hasFmaakFmamkF64Insts())) {
shrinkMadFma(MI);
continue;
}
Expand Down
33 changes: 22 additions & 11 deletions llvm/test/CodeGen/AMDGPU/literal64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -256,17 +256,28 @@ define amdgpu_ps <2 x float> @v_lshl_add_u64(i64 %a) {
; No folding into VOP2 promoted to VOP3

define amdgpu_ps <2 x float> @v_fma_f64(double %a, double %b) {
; GCN-LABEL: v_fma_f64:
; GCN: ; %bb.0:
; GCN-NEXT: v_mov_b64_e32 v[4:5], lit64(0x4063233333333333)
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GCN-NEXT: v_fmac_f64_e32 v[4:5], v[0:1], v[2:3]
; GCN-NEXT: v_mov_b64_e32 v[2:3], lit64(0x4069033333333333)
; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[2:3]
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GCN-NEXT: v_fmac_f64_e32 v[2:3], v[0:1], v[4:5]
; GCN-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
; GCN-NEXT: ; return to shader part epilog
; GCN-SDAG-LABEL: v_fma_f64:
; GCN-SDAG: ; %bb.0:
; GCN-SDAG-NEXT: v_fmaak_f64 v[4:5], v[0:1], v[2:3], lit64(0x4063233333333333)
; GCN-SDAG-NEXT: v_mov_b64_e32 v[2:3], lit64(0x4069033333333333)
; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GCN-SDAG-NEXT: v_fmaak_f64 v[0:1], v[0:1], v[4:5], lit64(0x4069033333333333)
; GCN-SDAG-NEXT: v_fmac_f64_e32 v[2:3], v[0:1], v[4:5]
; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GCN-SDAG-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
; GCN-SDAG-NEXT: ; return to shader part epilog
;
; GCN-GISEL-LABEL: v_fma_f64:
; GCN-GISEL: ; %bb.0:
; GCN-GISEL-NEXT: v_mov_b64_e32 v[4:5], lit64(0x4063233333333333)
; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GCN-GISEL-NEXT: v_fmac_f64_e32 v[4:5], v[0:1], v[2:3]
; GCN-GISEL-NEXT: v_mov_b64_e32 v[2:3], lit64(0x4069033333333333)
; GCN-GISEL-NEXT: v_fmaak_f64 v[0:1], v[0:1], v[4:5], lit64(0x4069033333333333)
; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GCN-GISEL-NEXT: v_fmac_f64_e32 v[2:3], v[0:1], v[4:5]
; GCN-GISEL-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
; GCN-GISEL-NEXT: ; return to shader part epilog
%r1 = call double @llvm.fma.f64(double %a, double %b, double 153.1) nounwind readnone
%r2 = call double @llvm.fma.f64(double %a, double %r1, double 200.1) nounwind readnone
%r3 = call double @llvm.fma.f64(double %r2, double %r1, double 200.1) nounwind readnone
Expand Down
141 changes: 140 additions & 1 deletion llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefix=GCN %s
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefixes=GCN,GFX942 %s
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1250 -run-pass peephole-opt -o - %s | FileCheck -check-prefixes=GCN,GFX1250 %s

---
name: fold_simm_virtual
Expand Down Expand Up @@ -564,6 +565,144 @@ body: |
...

---
name: fmac_sreg_64_src0_to_fmamk_f64
tracksRegLiveness: true
body: |
bb.0:
; GFX942-LABEL: name: fmac_sreg_64_src0_to_fmamk_f64
; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX942-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
; GFX942-NEXT: [[V_FMAC_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMAC_F64_e64 0, [[S_MOV_B]], 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMAC_F64_e64_]]
;
; GFX1250-LABEL: name: fmac_sreg_64_src0_to_fmamk_f64
; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX1250-NEXT: [[V_FMAMK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAMK_F64 [[DEF]], 1311768467750121200, [[DEF1]], implicit $mode, implicit $exec
; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAMK_F64_]]
%0:vreg_64_align2 = IMPLICIT_DEF
%1:vreg_64_align2 = IMPLICIT_DEF
%2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
%3:vreg_64_align2 = V_FMAC_F64_e64 0, %2, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
SI_RETURN_TO_EPILOG %3
...

---
name: fmac_sreg_64_src1_to_fmamk_f64
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: fmac_sreg_64_src1_to_fmamk_f64
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GCN-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
; GCN-NEXT: [[V_FMAC_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMAC_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_FMAC_F64_e64_]]
%0:vreg_64_align2 = IMPLICIT_DEF
%1:vreg_64_align2 = IMPLICIT_DEF
%2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
%3:vreg_64_align2 = V_FMAC_F64_e64 0, %0, 0, %1, 0, %1, 0, 0, implicit $mode, implicit $exec
SI_RETURN_TO_EPILOG %3
...

---
name: fmac_vreg_64_to_fmaak_f64
tracksRegLiveness: true
body: |
bb.0:
; GFX942-LABEL: name: fmac_vreg_64_to_fmaak_f64
; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX942-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
; GFX942-NEXT: [[V_FMAC_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMAC_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMAC_F64_e64_]]
;
; GFX1250-LABEL: name: fmac_vreg_64_to_fmaak_f64
; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX1250-NEXT: [[V_FMAAK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAAK_F64 [[DEF]], [[DEF1]], 1311768467750121200, implicit $mode, implicit $exec
; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAAK_F64_]]
%0:vreg_64_align2 = IMPLICIT_DEF
%1:vreg_64_align2 = IMPLICIT_DEF
%2:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
%3:vreg_64_align2 = V_FMAC_F64_e64 0, %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
SI_RETURN_TO_EPILOG %3
...

---
name: fma_sreg_64_src0_to_fmamk_f64
tracksRegLiveness: true
body: |
bb.0:
; GFX942-LABEL: name: fma_sreg_64_src0_to_fmamk_f64
; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX942-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
; GFX942-NEXT: [[V_FMA_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMA_F64_e64 0, [[S_MOV_B]], 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMA_F64_e64_]]
;
; GFX1250-LABEL: name: fma_sreg_64_src0_to_fmamk_f64
; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX1250-NEXT: [[V_FMAMK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAMK_F64 [[DEF]], 1311768467750121200, [[DEF1]], implicit $mode, implicit $exec
; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAMK_F64_]]
%0:vreg_64_align2 = IMPLICIT_DEF
%1:vreg_64_align2 = IMPLICIT_DEF
%2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
%3:vreg_64_align2 = V_FMA_F64_e64 0, %2, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
SI_RETURN_TO_EPILOG %3
...

---
name: fma_sreg_64_src1_to_fmamk_f64
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: fma_sreg_64_src1_to_fmamk_f64
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GCN-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
; GCN-NEXT: [[V_FMA_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMA_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_FMA_F64_e64_]]
%0:vreg_64_align2 = IMPLICIT_DEF
%1:vreg_64_align2 = IMPLICIT_DEF
%2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
%3:vreg_64_align2 = V_FMA_F64_e64 0, %0, 0, %1, 0, %1, 0, 0, implicit $mode, implicit $exec
SI_RETURN_TO_EPILOG %3
...

---
name: fma_vreg_64_to_fmaak_f64
tracksRegLiveness: true
body: |
bb.0:
; GFX942-LABEL: name: fma_vreg_64_to_fmaak_f64
; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX942-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
; GFX942-NEXT: [[V_FMA_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMA_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMA_F64_e64_]]
;
; GFX1250-LABEL: name: fma_vreg_64_to_fmaak_f64
; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GFX1250-NEXT: [[V_FMAAK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAAK_F64 [[DEF]], [[DEF1]], 1311768467750121200, implicit $mode, implicit $exec
; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAAK_F64_]]
%0:vreg_64_align2 = IMPLICIT_DEF
%1:vreg_64_align2 = IMPLICIT_DEF
%2:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
%3:vreg_64_align2 = V_FMA_F64_e64 0, %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
SI_RETURN_TO_EPILOG %3
...

---
name: fold_v_mov_b32_e32_literal_to_agpr
body: |
Expand Down
62 changes: 62 additions & 0 deletions llvm/test/CodeGen/AMDGPU/shrink-fma-f64.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass si-shrink-instructions %s -o - | FileCheck %s -check-prefix=GFX1250

---
name: fma_cvv_f64
body: |
bb.0:
; GFX1250-LABEL: name: fma_cvv_f64
; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAMK_F64 $vgpr0_vgpr1, 4638355772470722560, $vgpr2_vgpr3, implicit $mode, implicit $exec
; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
$vgpr0_vgpr1 = IMPLICIT_DEF
$vgpr2_vgpr3 = IMPLICIT_DEF
$vgpr4_vgpr5 = V_FMA_F64_e64 0, 4638355772470722560, 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
SI_RETURN implicit $vgpr4_vgpr5
...

---
name: fma_vcv_f64
body: |
bb.0:
; GFX1250-LABEL: name: fma_vcv_f64
; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAMK_F64 $vgpr0_vgpr1, 4638355772470722560, $vgpr2_vgpr3, implicit $mode, implicit $exec
; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
$vgpr0_vgpr1 = IMPLICIT_DEF
$vgpr2_vgpr3 = IMPLICIT_DEF
$vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, 4638355772470722560, 0, $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
SI_RETURN implicit $vgpr4_vgpr5
...

---
name: fma_vvc_f64
body: |
bb.0:
; GFX1250-LABEL: name: fma_vvc_f64
; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAAK_F64 $vgpr0_vgpr1, $vgpr2_vgpr3, 4638355772470722560, implicit $mode, implicit $exec
; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
$vgpr0_vgpr1 = IMPLICIT_DEF
$vgpr2_vgpr3 = IMPLICIT_DEF
$vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 4638355772470722560, 0, 0, implicit $mode, implicit $exec
SI_RETURN implicit $vgpr4_vgpr5
...

---
name: fma_vsc_f64
body: |
bb.0:
; GFX1250-LABEL: name: fma_vsc_f64
; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAAK_F64 $vgpr0_vgpr1, $vgpr2_vgpr3, 4638355772470722560, implicit $mode, implicit $exec
; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
$vgpr0_vgpr1 = IMPLICIT_DEF
$vgpr2_vgpr3 = IMPLICIT_DEF
$vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 4638355772470722560, 0, 0, implicit $mode, implicit $exec
SI_RETURN implicit $vgpr4_vgpr5
...
Loading
Loading