Skip to content

Commit cbba8f0

Browse files
authored
[AMDGPU] Codegen support for v_fmaak_f64/f_fmamk_f64 (#148734)
1 parent 2c67718 commit cbba8f0

File tree

6 files changed

+271
-28
lines changed

6 files changed

+271
-28
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3513,6 +3513,10 @@ static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc) {
35133513
? AMDGPU::V_FMAAK_F16_t16
35143514
: AMDGPU::V_FMAAK_F16_fake16
35153515
: AMDGPU::V_FMAAK_F16;
3516+
case AMDGPU::V_FMAC_F64_e32:
3517+
case AMDGPU::V_FMAC_F64_e64:
3518+
case AMDGPU::V_FMA_F64_e64:
3519+
return AMDGPU::V_FMAAK_F64;
35163520
default:
35173521
llvm_unreachable("invalid instruction");
35183522
}
@@ -3541,6 +3545,10 @@ static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc) {
35413545
? AMDGPU::V_FMAMK_F16_t16
35423546
: AMDGPU::V_FMAMK_F16_fake16
35433547
: AMDGPU::V_FMAMK_F16;
3548+
case AMDGPU::V_FMAC_F64_e32:
3549+
case AMDGPU::V_FMAC_F64_e64:
3550+
case AMDGPU::V_FMA_F64_e64:
3551+
return AMDGPU::V_FMAMK_F64;
35443552
default:
35453553
llvm_unreachable("invalid instruction");
35463554
}
@@ -3619,7 +3627,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
36193627
Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
36203628
Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
36213629
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3622-
Opc == AMDGPU::V_FMAC_F16_fake16_e64) {
3630+
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMA_F64_e64 ||
3631+
Opc == AMDGPU::V_FMAC_F64_e64) {
36233632
// Don't fold if we are using source or output modifiers. The new VOP2
36243633
// instructions don't have them.
36253634
if (hasAnyModifiersSet(UseMI))
@@ -3691,7 +3700,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
36913700

36923701
if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
36933702
Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3694-
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
3703+
Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3704+
Opc == AMDGPU::V_FMAC_F16_e64 || Opc == AMDGPU::V_FMAC_F64_e64)
36953705
UseMI.untieRegOperand(
36963706
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
36973707

@@ -3759,7 +3769,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
37593769

37603770
if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
37613771
Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3762-
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
3772+
Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3773+
Opc == AMDGPU::V_FMAC_F16_e64 || Opc == AMDGPU::V_FMAC_F64_e64)
37633774
UseMI.untieRegOperand(
37643775
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
37653776

@@ -4080,8 +4091,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
40804091
const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
40814092
const MachineOperand *OpSel = getNamedOperand(MI, AMDGPU::OpName::op_sel);
40824093

4083-
if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
4084-
!IsLegacy &&
4094+
if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4095+
(!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
40854096
// If we have an SGPR input, we will violate the constant bus restriction.
40864097
(ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
40874098
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,10 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
463463
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
464464
NewOpcode = AMDGPU::V_FMAAK_F16_fake16;
465465
break;
466+
case AMDGPU::V_FMA_F64_e64:
467+
if (ST->hasFmaakFmamkF64Insts())
468+
NewOpcode = AMDGPU::V_FMAAK_F64;
469+
break;
466470
}
467471
}
468472

@@ -497,6 +501,10 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
497501
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
498502
NewOpcode = AMDGPU::V_FMAMK_F16_fake16;
499503
break;
504+
case AMDGPU::V_FMA_F64_e64:
505+
if (ST->hasFmaakFmamkF64Insts())
506+
NewOpcode = AMDGPU::V_FMAMK_F64;
507+
break;
500508
}
501509
}
502510

@@ -961,7 +969,9 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
961969
MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
962970
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 ||
963971
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_t16_e64 ||
964-
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64) {
972+
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64 ||
973+
(MI.getOpcode() == AMDGPU::V_FMA_F64_e64 &&
974+
ST->hasFmaakFmamkF64Insts())) {
965975
shrinkMadFma(MI);
966976
continue;
967977
}

llvm/test/CodeGen/AMDGPU/literal64.ll

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -256,17 +256,28 @@ define amdgpu_ps <2 x float> @v_lshl_add_u64(i64 %a) {
256256
; No folding into VOP2 promoted to VOP3
257257

258258
define amdgpu_ps <2 x float> @v_fma_f64(double %a, double %b) {
259-
; GCN-LABEL: v_fma_f64:
260-
; GCN: ; %bb.0:
261-
; GCN-NEXT: v_mov_b64_e32 v[4:5], lit64(0x4063233333333333)
262-
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
263-
; GCN-NEXT: v_fmac_f64_e32 v[4:5], v[0:1], v[2:3]
264-
; GCN-NEXT: v_mov_b64_e32 v[2:3], lit64(0x4069033333333333)
265-
; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[2:3]
266-
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
267-
; GCN-NEXT: v_fmac_f64_e32 v[2:3], v[0:1], v[4:5]
268-
; GCN-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
269-
; GCN-NEXT: ; return to shader part epilog
259+
; GCN-SDAG-LABEL: v_fma_f64:
260+
; GCN-SDAG: ; %bb.0:
261+
; GCN-SDAG-NEXT: v_fmaak_f64 v[4:5], v[0:1], v[2:3], lit64(0x4063233333333333)
262+
; GCN-SDAG-NEXT: v_mov_b64_e32 v[2:3], lit64(0x4069033333333333)
263+
; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
264+
; GCN-SDAG-NEXT: v_fmaak_f64 v[0:1], v[0:1], v[4:5], lit64(0x4069033333333333)
265+
; GCN-SDAG-NEXT: v_fmac_f64_e32 v[2:3], v[0:1], v[4:5]
266+
; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
267+
; GCN-SDAG-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
268+
; GCN-SDAG-NEXT: ; return to shader part epilog
269+
;
270+
; GCN-GISEL-LABEL: v_fma_f64:
271+
; GCN-GISEL: ; %bb.0:
272+
; GCN-GISEL-NEXT: v_mov_b64_e32 v[4:5], lit64(0x4063233333333333)
273+
; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
274+
; GCN-GISEL-NEXT: v_fmac_f64_e32 v[4:5], v[0:1], v[2:3]
275+
; GCN-GISEL-NEXT: v_mov_b64_e32 v[2:3], lit64(0x4069033333333333)
276+
; GCN-GISEL-NEXT: v_fmaak_f64 v[0:1], v[0:1], v[4:5], lit64(0x4069033333333333)
277+
; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
278+
; GCN-GISEL-NEXT: v_fmac_f64_e32 v[2:3], v[0:1], v[4:5]
279+
; GCN-GISEL-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
280+
; GCN-GISEL-NEXT: ; return to shader part epilog
270281
%r1 = call double @llvm.fma.f64(double %a, double %b, double 153.1) nounwind readnone
271282
%r2 = call double @llvm.fma.f64(double %a, double %r1, double 200.1) nounwind readnone
272283
%r3 = call double @llvm.fma.f64(double %r2, double %r1, double 200.1) nounwind readnone

llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir

Lines changed: 140 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefix=GCN %s
2+
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefixes=GCN,GFX942 %s
3+
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1250 -run-pass peephole-opt -o - %s | FileCheck -check-prefixes=GCN,GFX1250 %s
34

45
---
56
name: fold_simm_virtual
@@ -564,6 +565,144 @@ body: |
564565
565566
...
566567

568+
---
569+
name: fmac_sreg_64_src0_to_fmamk_f64
570+
tracksRegLiveness: true
571+
body: |
572+
bb.0:
573+
574+
; GFX942-LABEL: name: fmac_sreg_64_src0_to_fmamk_f64
575+
; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
576+
; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
577+
; GFX942-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
578+
; GFX942-NEXT: [[V_FMAC_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMAC_F64_e64 0, [[S_MOV_B]], 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
579+
; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMAC_F64_e64_]]
580+
;
581+
; GFX1250-LABEL: name: fmac_sreg_64_src0_to_fmamk_f64
582+
; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
583+
; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
584+
; GFX1250-NEXT: [[V_FMAMK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAMK_F64 [[DEF]], 1311768467750121200, [[DEF1]], implicit $mode, implicit $exec
585+
; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAMK_F64_]]
586+
%0:vreg_64_align2 = IMPLICIT_DEF
587+
%1:vreg_64_align2 = IMPLICIT_DEF
588+
%2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
589+
%3:vreg_64_align2 = V_FMAC_F64_e64 0, %2, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
590+
SI_RETURN_TO_EPILOG %3
591+
...
592+
593+
---
594+
name: fmac_sreg_64_src1_to_fmamk_f64
595+
tracksRegLiveness: true
596+
body: |
597+
bb.0:
598+
599+
; GCN-LABEL: name: fmac_sreg_64_src1_to_fmamk_f64
600+
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
601+
; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
602+
; GCN-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
603+
; GCN-NEXT: [[V_FMAC_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMAC_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
604+
; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_FMAC_F64_e64_]]
605+
%0:vreg_64_align2 = IMPLICIT_DEF
606+
%1:vreg_64_align2 = IMPLICIT_DEF
607+
%2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
608+
%3:vreg_64_align2 = V_FMAC_F64_e64 0, %0, 0, %1, 0, %1, 0, 0, implicit $mode, implicit $exec
609+
SI_RETURN_TO_EPILOG %3
610+
...
611+
612+
---
613+
name: fmac_vreg_64_to_fmaak_f64
614+
tracksRegLiveness: true
615+
body: |
616+
bb.0:
617+
618+
; GFX942-LABEL: name: fmac_vreg_64_to_fmaak_f64
619+
; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
620+
; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
621+
; GFX942-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
622+
; GFX942-NEXT: [[V_FMAC_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMAC_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
623+
; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMAC_F64_e64_]]
624+
;
625+
; GFX1250-LABEL: name: fmac_vreg_64_to_fmaak_f64
626+
; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
627+
; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
628+
; GFX1250-NEXT: [[V_FMAAK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAAK_F64 [[DEF]], [[DEF1]], 1311768467750121200, implicit $mode, implicit $exec
629+
; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAAK_F64_]]
630+
%0:vreg_64_align2 = IMPLICIT_DEF
631+
%1:vreg_64_align2 = IMPLICIT_DEF
632+
%2:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
633+
%3:vreg_64_align2 = V_FMAC_F64_e64 0, %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
634+
SI_RETURN_TO_EPILOG %3
635+
...
636+
637+
---
638+
name: fma_sreg_64_src0_to_fmamk_f64
639+
tracksRegLiveness: true
640+
body: |
641+
bb.0:
642+
643+
; GFX942-LABEL: name: fma_sreg_64_src0_to_fmamk_f64
644+
; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
645+
; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
646+
; GFX942-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
647+
; GFX942-NEXT: [[V_FMA_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMA_F64_e64 0, [[S_MOV_B]], 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
648+
; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMA_F64_e64_]]
649+
;
650+
; GFX1250-LABEL: name: fma_sreg_64_src0_to_fmamk_f64
651+
; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
652+
; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
653+
; GFX1250-NEXT: [[V_FMAMK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAMK_F64 [[DEF]], 1311768467750121200, [[DEF1]], implicit $mode, implicit $exec
654+
; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAMK_F64_]]
655+
%0:vreg_64_align2 = IMPLICIT_DEF
656+
%1:vreg_64_align2 = IMPLICIT_DEF
657+
%2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
658+
%3:vreg_64_align2 = V_FMA_F64_e64 0, %2, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
659+
SI_RETURN_TO_EPILOG %3
660+
...
661+
662+
---
663+
name: fma_sreg_64_src1_to_fmamk_f64
664+
tracksRegLiveness: true
665+
body: |
666+
bb.0:
667+
668+
; GCN-LABEL: name: fma_sreg_64_src1_to_fmamk_f64
669+
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
670+
; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
671+
; GCN-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
672+
; GCN-NEXT: [[V_FMA_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMA_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
673+
; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_FMA_F64_e64_]]
674+
%0:vreg_64_align2 = IMPLICIT_DEF
675+
%1:vreg_64_align2 = IMPLICIT_DEF
676+
%2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
677+
%3:vreg_64_align2 = V_FMA_F64_e64 0, %0, 0, %1, 0, %1, 0, 0, implicit $mode, implicit $exec
678+
SI_RETURN_TO_EPILOG %3
679+
...
680+
681+
---
682+
name: fma_vreg_64_to_fmaak_f64
683+
tracksRegLiveness: true
684+
body: |
685+
bb.0:
686+
687+
; GFX942-LABEL: name: fma_vreg_64_to_fmaak_f64
688+
; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
689+
; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
690+
; GFX942-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
691+
; GFX942-NEXT: [[V_FMA_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMA_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
692+
; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMA_F64_e64_]]
693+
;
694+
; GFX1250-LABEL: name: fma_vreg_64_to_fmaak_f64
695+
; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
696+
; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
697+
; GFX1250-NEXT: [[V_FMAAK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAAK_F64 [[DEF]], [[DEF1]], 1311768467750121200, implicit $mode, implicit $exec
698+
; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAAK_F64_]]
699+
%0:vreg_64_align2 = IMPLICIT_DEF
700+
%1:vreg_64_align2 = IMPLICIT_DEF
701+
%2:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
702+
%3:vreg_64_align2 = V_FMA_F64_e64 0, %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
703+
SI_RETURN_TO_EPILOG %3
704+
...
705+
567706
---
568707
name: fold_v_mov_b32_e32_literal_to_agpr
569708
body: |
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass si-shrink-instructions %s -o - | FileCheck %s -check-prefix=GFX1250
3+
4+
---
5+
name: fma_cvv_f64
6+
body: |
7+
bb.0:
8+
; GFX1250-LABEL: name: fma_cvv_f64
9+
; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
10+
; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
11+
; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAMK_F64 $vgpr0_vgpr1, 4638355772470722560, $vgpr2_vgpr3, implicit $mode, implicit $exec
12+
; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
13+
$vgpr0_vgpr1 = IMPLICIT_DEF
14+
$vgpr2_vgpr3 = IMPLICIT_DEF
15+
$vgpr4_vgpr5 = V_FMA_F64_e64 0, 4638355772470722560, 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
16+
SI_RETURN implicit $vgpr4_vgpr5
17+
...
18+
19+
---
20+
name: fma_vcv_f64
21+
body: |
22+
bb.0:
23+
; GFX1250-LABEL: name: fma_vcv_f64
24+
; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
25+
; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
26+
; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAMK_F64 $vgpr0_vgpr1, 4638355772470722560, $vgpr2_vgpr3, implicit $mode, implicit $exec
27+
; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
28+
$vgpr0_vgpr1 = IMPLICIT_DEF
29+
$vgpr2_vgpr3 = IMPLICIT_DEF
30+
$vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, 4638355772470722560, 0, $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
31+
SI_RETURN implicit $vgpr4_vgpr5
32+
...
33+
34+
---
35+
name: fma_vvc_f64
36+
body: |
37+
bb.0:
38+
; GFX1250-LABEL: name: fma_vvc_f64
39+
; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
40+
; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
41+
; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAAK_F64 $vgpr0_vgpr1, $vgpr2_vgpr3, 4638355772470722560, implicit $mode, implicit $exec
42+
; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
43+
$vgpr0_vgpr1 = IMPLICIT_DEF
44+
$vgpr2_vgpr3 = IMPLICIT_DEF
45+
$vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 4638355772470722560, 0, 0, implicit $mode, implicit $exec
46+
SI_RETURN implicit $vgpr4_vgpr5
47+
...
48+
49+
---
50+
name: fma_vsc_f64
51+
body: |
52+
bb.0:
53+
; GFX1250-LABEL: name: fma_vsc_f64
54+
; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
55+
; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
56+
; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAAK_F64 $vgpr0_vgpr1, $vgpr2_vgpr3, 4638355772470722560, implicit $mode, implicit $exec
57+
; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
58+
$vgpr0_vgpr1 = IMPLICIT_DEF
59+
$vgpr2_vgpr3 = IMPLICIT_DEF
60+
$vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 4638355772470722560, 0, 0, implicit $mode, implicit $exec
61+
SI_RETURN implicit $vgpr4_vgpr5
62+
...

0 commit comments

Comments
 (0)