diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 72f2ba75c927e..7b18a98d7f3ca 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3221,6 +3221,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FMAXNUM_IEEE: case TargetOpcode::G_FMINIMUM: case TargetOpcode::G_FMAXIMUM: + case TargetOpcode::G_FMINIMUMNUM: + case TargetOpcode::G_FMAXIMUMNUM: case TargetOpcode::G_FDIV: case TargetOpcode::G_FREM: case TargetOpcode::G_FCEIL: @@ -4591,6 +4593,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return lowerFCopySign(MI); case G_FMINNUM: case G_FMAXNUM: + case G_FMINIMUMNUM: + case G_FMAXIMUMNUM: return lowerFMinNumMaxNum(MI); case G_MERGE_VALUES: return lowerMergeValues(MI); @@ -5379,6 +5383,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FMAXNUM_IEEE: case G_FMINIMUM: case G_FMAXIMUM: + case G_FMINIMUMNUM: + case G_FMAXIMUMNUM: case G_FSHL: case G_FSHR: case G_ROTL: @@ -6090,6 +6096,8 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, case TargetOpcode::G_FMAXNUM_IEEE: case TargetOpcode::G_FMINIMUM: case TargetOpcode::G_FMAXIMUM: + case TargetOpcode::G_FMINIMUMNUM: + case TargetOpcode::G_FMAXIMUMNUM: case TargetOpcode::G_STRICT_FADD: case TargetOpcode::G_STRICT_FSUB: case TargetOpcode::G_STRICT_FMUL: @@ -8139,8 +8147,27 @@ LegalizerHelper::lowerFCopySign(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { - unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ? - TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE; + // FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have + // identical handling. fminimumnum/fmaximumnum also need a path that do not + // depend on fminnum/fmaxnum. + + unsigned NewOp; + switch (MI.getOpcode()) { + case TargetOpcode::G_FMINNUM: + NewOp = TargetOpcode::G_FMINNUM_IEEE; + break; + case TargetOpcode::G_FMINIMUMNUM: + NewOp = TargetOpcode::G_FMINNUM; + break; + case TargetOpcode::G_FMAXNUM: + NewOp = TargetOpcode::G_FMAXNUM_IEEE; + break; + case TargetOpcode::G_FMAXIMUMNUM: + NewOp = TargetOpcode::G_FMAXNUM; + break; + default: + llvm_unreachable("unexpected min/max opcode"); + } auto [Dst, Src0, Src1] = MI.getFirst3Regs(); LLT Ty = MRI.getType(Dst); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 7bb461e0a239f..667c466a998e0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -960,6 +960,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, auto &MinNumMaxNum = getActionDefinitionsBuilder({ G_FMINNUM, G_FMAXNUM, G_FMINNUM_IEEE, G_FMAXNUM_IEEE}); + // TODO: These should be custom lowered and are directly legal with IEEE=0 + auto &MinimumNumMaximumNum = + getActionDefinitionsBuilder({G_FMINIMUMNUM, G_FMAXIMUMNUM}); + if (ST.hasVOP3PInsts()) { MinNumMaxNum.customFor(FPTypesPK16) .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) @@ -976,6 +980,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarize(0); } + MinimumNumMaximumNum.lower(); + if (ST.hasVOP3PInsts()) FPOpActions.clampMaxNumElementsStrict(0, S16, 2); diff --git a/llvm/test/CodeGen/AMDGPU/maximumnum.ll b/llvm/test/CodeGen/AMDGPU/maximumnum.ll index e299f959edb08..c45d86ce306e7 100644 --- a/llvm/test/CodeGen/AMDGPU/maximumnum.ll +++ b/llvm/test/CodeGen/AMDGPU/maximumnum.ll @@ -1,106 +1,209 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s + +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s + +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-SDAG,GFX900-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-GISEL,GFX900-GISEL %s + +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-SDAG,GFX950-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-GISEL,GFX950-GISEL %s + +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s + +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16,GFX11-SDAG,GFX11-TRUE16-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16,GFX11-GISEL,GFX11-TRUE16-GISEL %s + +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-SDAG,GFX11-FAKE16-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-GISEL,GFX11-FAKE16-GISEL %s + +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16,GFX12-SDAG,GFX12-TRUE16-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16,GFX12-GISEL,GFX12-TRUE16-GISEL %s + +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16,GFX12-SDAG,GFX12-FAKE16-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16,GFX12-GISEL,GFX12-FAKE16-GISEL %s define half @v_maximumnum_f16(half %x, half %y) { -; GFX7-LABEL: v_maximumnum_f16: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f16: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_maximumnum_f16: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_f16: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX10-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-TRUE16-LABEL: v_maximumnum_f16: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: v_maximumnum_f16: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-TRUE16-LABEL: v_maximumnum_f16: -; GFX12-TRUE16: ; %bb.0: -; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h -; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-FAKE16-LABEL: v_maximumnum_f16: -; GFX12-FAKE16: ; %bb.0: -; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1 -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 -; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f16: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f16: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_maximumnum_f16: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_maximumnum_f16: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX9-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_f16: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_f16: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-SDAG-LABEL: v_maximumnum_f16: +; GFX11-TRUE16-SDAG: ; %bb.0: +; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-GISEL-LABEL: v_maximumnum_f16: +; GFX11-TRUE16-GISEL: ; %bb.0: +; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l +; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-SDAG-LABEL: v_maximumnum_f16: +; GFX11-FAKE16-SDAG: ; %bb.0: +; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-GISEL-LABEL: v_maximumnum_f16: +; GFX11-FAKE16-GISEL: ; %bb.0: +; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16: +; GFX12-TRUE16-SDAG: ; %bb.0: +; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-GISEL-LABEL: v_maximumnum_f16: +; GFX12-TRUE16-GISEL: ; %bb.0: +; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l +; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-SDAG-LABEL: v_maximumnum_f16: +; GFX12-FAKE16-SDAG: ; %bb.0: +; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-GISEL-LABEL: v_maximumnum_f16: +; GFX12-FAKE16-GISEL: ; %bb.0: +; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call half @llvm.maximumnum.f16(half %x, half %y) ret half %result } define half @v_maximumnum_f16_nnan(half %x, half %y) { -; GFX7-LABEL: v_maximumnum_f16_nnan: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f16_nnan: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f16_nnan: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_maximumnum_f16_nnan: ; GFX8: ; %bb.0: @@ -156,13 +259,22 @@ define half @v_maximumnum_f16_nnan(half %x, half %y) { } define half @v_maximumnum_f16_1.0(half %x) { -; GFX7-LABEL: v_maximumnum_f16_1.0: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_max_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f16_1.0: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f16_1.0: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, 1.0 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_maximumnum_f16_1.0: ; GFX8: ; %bb.0: @@ -229,57 +341,109 @@ define half @v_maximumnum_f16_1.0(half %x) { } define float @v_maximumnum_f32(float %x, float %y) { -; GFX7-LABEL: v_maximumnum_f32: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 -; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_maximumnum_f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_f32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX10-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_f32: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_f32: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_maximumnum_f32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_maximumnum_f32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_f32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_f32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_f32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_f32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_f32: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_f32: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.maximumnum.f32(float %x, float %y) ret float %result } @@ -329,59 +493,113 @@ define float @v_maximumnum_f32_nnan(float %x, float %y) { } define double @v_maximumnum_f64(double %x, double %y) { -; GFX7-LABEL: v_maximumnum_f64: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f64: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_maximumnum_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_f64: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] -; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3] -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f64: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f64: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f64: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f64: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_maximumnum_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_maximumnum_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX9-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_f64: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3] +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_f64: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3] +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call double @llvm.maximumnum.f64(double %x, double %y) ret double %result } @@ -707,505 +925,978 @@ define double @v_maximumnum_f64_1.0(double %x) { } define half @v_maximumnum_f16_s_v(half inreg %x, half %y) { -; GFX7-LABEL: v_maximumnum_f16_s_v: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s16 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_max_f32_e32 v0, v1, v0 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f16_s_v: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_max_f16_e64 v1, s16, s16 -; GFX8-NEXT: v_max_f16_e32 v0, v1, v0 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_maximumnum_f16_s_v: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX900-NEXT: v_max_f16_e64 v1, s16, s16 -; GFX900-NEXT: v_max_f16_e32 v0, v1, v0 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_maximumnum_f16_s_v: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX950-NEXT: v_max_f16_e64 v1, s0, s0 -; GFX950-NEXT: v_max_f16_e32 v0, v1, v0 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_f16_s_v: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX10-NEXT: v_max_f16_e64 v1, s16, s16 -; GFX10-NEXT: v_max_f16_e32 v0, v1, v0 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-TRUE16-LABEL: v_maximumnum_f16_s_v: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l -; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, s0, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.h, v0.l -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: v_maximumnum_f16_s_v: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, s0, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v1, v0 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-TRUE16-LABEL: v_maximumnum_f16_s_v: -; GFX12-TRUE16: ; %bb.0: -; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l -; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, s0, s0 -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.h, v0.l -; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-FAKE16-LABEL: v_maximumnum_f16_s_v: -; GFX12-FAKE16: ; %bb.0: -; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 -; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, s0, s0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v1, v0 -; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f16_s_v: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, s16 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v1, v0 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f16_s_v: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, s16 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v1, v0 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f16_s_v: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v1, v0 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f16_s_v: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v1, v0 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_maximumnum_f16_s_v: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v1, v0 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_f16_s_v: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v1, v0 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_f16_s_v: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_max_f16_e64 v1, s0, s0 +; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v1, v0 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_f16_s_v: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f16_e64 v1, s0, s0 +; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v1, v0 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_f16_s_v: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v1, v0 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_f16_s_v: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v1, v0 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-SDAG-LABEL: v_maximumnum_f16_s_v: +; GFX11-TRUE16-SDAG: ; %bb.0: +; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, s0, s0 +; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.h, v0.l +; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-GISEL-LABEL: v_maximumnum_f16_s_v: +; GFX11-TRUE16-GISEL: ; %bb.0: +; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, s0, s0 +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.h, v0.l +; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-SDAG-LABEL: v_maximumnum_f16_s_v: +; GFX11-FAKE16-SDAG: ; %bb.0: +; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, s0, s0 +; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v1, v0 +; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-GISEL-LABEL: v_maximumnum_f16_s_v: +; GFX11-FAKE16-GISEL: ; %bb.0: +; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, s0, s0 +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v1, v0 +; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_s_v: +; GFX12-TRUE16-SDAG: ; %bb.0: +; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, s0, s0 +; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.h, v0.l +; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-GISEL-LABEL: v_maximumnum_f16_s_v: +; GFX12-TRUE16-GISEL: ; %bb.0: +; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, s0, s0 +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.h, v0.l +; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-SDAG-LABEL: v_maximumnum_f16_s_v: +; GFX12-FAKE16-SDAG: ; %bb.0: +; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, s0, s0 +; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v1, v0 +; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-GISEL-LABEL: v_maximumnum_f16_s_v: +; GFX12-FAKE16-GISEL: ; %bb.0: +; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, s0, s0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v1, v0 +; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call half @llvm.maximumnum.f16(half %x, half %y) ret half %result } define half @v_maximumnum_f16_v_s(half %x, half inreg %y) { -; GFX7-LABEL: v_maximumnum_f16_v_s: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s16 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f16_v_s: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_e64 v1, s16, s16 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_maximumnum_f16_v_s: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f16_e64 v1, s16, s16 -; GFX900-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX900-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_maximumnum_f16_v_s: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f16_e64 v1, s0, s0 -; GFX950-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX950-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_f16_v_s: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f16_e64 v1, s16, s16 -; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX10-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-TRUE16-LABEL: v_maximumnum_f16_v_s: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, s0, s0 -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: v_maximumnum_f16_v_s: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, s0, s0 -; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-TRUE16-LABEL: v_maximumnum_f16_v_s: -; GFX12-TRUE16: ; %bb.0: -; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, s0, s0 -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h -; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-FAKE16-LABEL: v_maximumnum_f16_v_s: -; GFX12-FAKE16: ; %bb.0: -; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, s0, s0 -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 -; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f16_v_s: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, s16 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f16_v_s: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, s16 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f16_v_s: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f16_v_s: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_maximumnum_f16_v_s: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_f16_v_s: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_f16_v_s: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f16_e64 v1, s0, s0 +; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_f16_v_s: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f16_e64 v1, s0, s0 +; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_f16_v_s: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_f16_v_s: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-SDAG-LABEL: v_maximumnum_f16_v_s: +; GFX11-TRUE16-SDAG: ; %bb.0: +; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, s0, s0 +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-GISEL-LABEL: v_maximumnum_f16_v_s: +; GFX11-TRUE16-GISEL: ; %bb.0: +; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, s0, s0 +; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-SDAG-LABEL: v_maximumnum_f16_v_s: +; GFX11-FAKE16-SDAG: ; %bb.0: +; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, s0, s0 +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-GISEL-LABEL: v_maximumnum_f16_v_s: +; GFX11-FAKE16-GISEL: ; %bb.0: +; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, s0, s0 +; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_v_s: +; GFX12-TRUE16-SDAG: ; %bb.0: +; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, s0, s0 +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-GISEL-LABEL: v_maximumnum_f16_v_s: +; GFX12-TRUE16-GISEL: ; %bb.0: +; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, s0, s0 +; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-SDAG-LABEL: v_maximumnum_f16_v_s: +; GFX12-FAKE16-SDAG: ; %bb.0: +; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, s0, s0 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-GISEL-LABEL: v_maximumnum_f16_v_s: +; GFX12-FAKE16-GISEL: ; %bb.0: +; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, s0, s0 +; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call half @llvm.maximumnum.f16(half %x, half %y) ret half %result } define half @v_maximumnum_f16_s_s(half inreg %x, half inreg %y) { -; GFX7-LABEL: v_maximumnum_f16_s_s: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, s16 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s17 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f16_s_s: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_e64 v0, s17, s17 -; GFX8-NEXT: v_max_f16_e64 v1, s16, s16 -; GFX8-NEXT: v_max_f16_e32 v0, v1, v0 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_maximumnum_f16_s_s: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f16_e64 v0, s17, s17 -; GFX900-NEXT: v_max_f16_e64 v1, s16, s16 -; GFX900-NEXT: v_max_f16_e32 v0, v1, v0 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_maximumnum_f16_s_s: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f16_e64 v0, s1, s1 -; GFX950-NEXT: v_max_f16_e64 v1, s0, s0 -; GFX950-NEXT: v_max_f16_e32 v0, v1, v0 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_f16_s_s: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f16_e64 v0, s17, s17 -; GFX10-NEXT: v_max_f16_e64 v1, s16, s16 -; GFX10-NEXT: v_max_f16_e32 v0, v1, v0 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-TRUE16-LABEL: v_maximumnum_f16_s_s: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, s1, s1 -; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, s0, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.h, v0.l -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: v_maximumnum_f16_s_s: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_max_f16_e64 v0, s1, s1 -; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, s0, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v1, v0 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-TRUE16-LABEL: v_maximumnum_f16_s_s: -; GFX12-TRUE16: ; %bb.0: -; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.l, s1, s1 -; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, s0, s0 -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.h, v0.l -; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-FAKE16-LABEL: v_maximumnum_f16_s_s: -; GFX12-FAKE16: ; %bb.0: -; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v0, s1, s1 -; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, s0, s0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v1, v0 -; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f16_s_s: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, s16 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, s17 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f16_s_s: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, s16 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, s17 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f16_s_s: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_e64 v0, s17, s17 +; GFX8-SDAG-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v1, v0 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f16_s_s: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e64 v0, s16, s16 +; GFX8-GISEL-NEXT: v_max_f16_e64 v1, s17, s17 +; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_maximumnum_f16_s_s: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f16_e64 v0, s17, s17 +; GFX900-SDAG-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v1, v0 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_f16_s_s: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f16_e64 v0, s16, s16 +; GFX900-GISEL-NEXT: v_max_f16_e64 v1, s17, s17 +; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_f16_s_s: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f16_e64 v0, s1, s1 +; GFX950-SDAG-NEXT: v_max_f16_e64 v1, s0, s0 +; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v1, v0 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_f16_s_s: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f16_e64 v0, s0, s0 +; GFX950-GISEL-NEXT: v_max_f16_e64 v1, s1, s1 +; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_f16_s_s: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f16_e64 v0, s17, s17 +; GFX10-SDAG-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v1, v0 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_f16_s_s: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f16_e64 v0, s16, s16 +; GFX10-GISEL-NEXT: v_max_f16_e64 v1, s17, s17 +; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-SDAG-LABEL: v_maximumnum_f16_s_s: +; GFX11-TRUE16-SDAG: ; %bb.0: +; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.l, s1, s1 +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, s0, s0 +; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.h, v0.l +; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-GISEL-LABEL: v_maximumnum_f16_s_s: +; GFX11-TRUE16-GISEL: ; %bb.0: +; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.l, s0, s0 +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, s1, s1 +; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-SDAG-LABEL: v_maximumnum_f16_s_s: +; GFX11-FAKE16-SDAG: ; %bb.0: +; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v0, s1, s1 +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, s0, s0 +; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v1, v0 +; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-GISEL-LABEL: v_maximumnum_f16_s_s: +; GFX11-FAKE16-GISEL: ; %bb.0: +; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v0, s0, s0 +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, s1, s1 +; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_s_s: +; GFX12-TRUE16-SDAG: ; %bb.0: +; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, s1, s1 +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, s0, s0 +; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.h, v0.l +; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-GISEL-LABEL: v_maximumnum_f16_s_s: +; GFX12-TRUE16-GISEL: ; %bb.0: +; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, s0, s0 +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, s1, s1 +; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-SDAG-LABEL: v_maximumnum_f16_s_s: +; GFX12-FAKE16-SDAG: ; %bb.0: +; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, s1, s1 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, s0, s0 +; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v1, v0 +; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-GISEL-LABEL: v_maximumnum_f16_s_s: +; GFX12-FAKE16-GISEL: ; %bb.0: +; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, s0, s0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, s1, s1 +; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call half @llvm.maximumnum.f16(half %x, half %y) ret half %result } define float @v_maximumnum_f32_s_v(float inreg %x, float %y) { -; GFX7-LABEL: v_maximumnum_f32_s_v: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s16 -; GFX7-NEXT: v_max_f32_e32 v0, v1, v0 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f32_s_v: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, s16 -; GFX8-NEXT: v_max_f32_e32 v0, v1, v0 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_maximumnum_f32_s_v: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX900-NEXT: v_max_f32_e64 v1, s16, s16 -; GFX900-NEXT: v_max_f32_e32 v0, v1, v0 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_maximumnum_f32_s_v: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX950-NEXT: v_max_f32_e64 v1, s0, s0 -; GFX950-NEXT: v_max_f32_e32 v0, v1, v0 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_f32_s_v: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX10-NEXT: v_max_f32_e64 v1, s16, s16 -; GFX10-NEXT: v_max_f32_e32 v0, v1, v0 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_f32_s_v: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX11-NEXT: v_max_f32_e64 v1, s0, s0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_max_f32_e32 v0, v1, v0 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_f32_s_v: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 -; GFX12-NEXT: v_max_num_f32_e64 v1, s0, s0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_max_num_f32_e32 v0, v1, v0 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f32_s_v: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v1, v0 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f32_s_v: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s16 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v1, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f32_s_v: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16 +; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v1, v0 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f32_s_v: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s16 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v1, v0 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_maximumnum_f32_s_v: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_max_f32_e64 v1, s16, s16 +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v1, v0 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_f32_s_v: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e64 v1, s16, s16 +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v1, v0 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_f32_s_v: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_max_f32_e64 v1, s0, s0 +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v1, v0 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_f32_s_v: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f32_e64 v1, s0, s0 +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v1, v0 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_f32_s_v: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_max_f32_e64 v1, s16, s16 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v1, v0 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_f32_s_v: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e64 v1, s16, s16 +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v1, v0 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_f32_s_v: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX11-SDAG-NEXT: v_max_f32_e64 v1, s0, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v1, v0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_f32_s_v: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f32_e64 v1, s0, s0 +; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v1, v0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_f32_s_v: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v1, v0 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_f32_s_v: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, s0, s0 +; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v1, v0 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.maximumnum.f32(float %x, float %y) ret float %result } define float @v_maximumnum_f32_v_s(float %x, float inreg %y) { -; GFX7-LABEL: v_maximumnum_f32_v_s: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s16 -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f32_v_s: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, s16 -; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_maximumnum_f32_v_s: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f32_e64 v1, s16, s16 -; GFX900-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX900-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_maximumnum_f32_v_s: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e64 v1, s0, s0 -; GFX950-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX950-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_f32_v_s: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e64 v1, s16, s16 -; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX10-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_f32_v_s: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f32_e64 v1, s0, s0 -; GFX11-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_f32_v_s: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f32_e64 v1, s0, s0 -; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f32_v_s: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f32_v_s: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s16 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f32_v_s: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f32_v_s: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s16 +; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_maximumnum_f32_v_s: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f32_e64 v1, s16, s16 +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_f32_v_s: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f32_e64 v1, s16, s16 +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_f32_v_s: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f32_e64 v1, s0, s0 +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_f32_v_s: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f32_e64 v1, s0, s0 +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_f32_v_s: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e64 v1, s16, s16 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_f32_v_s: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f32_e64 v1, s16, s16 +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_f32_v_s: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f32_e64 v1, s0, s0 +; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_f32_v_s: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX11-GISEL-NEXT: v_max_f32_e64 v1, s0, s0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_f32_v_s: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0 +; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_f32_v_s: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, s0, s0 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.maximumnum.f32(float %x, float %y) ret float %result } define float @v_maximumnum_f32_s_s(float inreg %x, float inreg %y) { -; GFX7-LABEL: v_maximumnum_f32_s_s: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e64 v0, 1.0, s17 -; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s16 -; GFX7-NEXT: v_max_f32_e32 v0, v1, v0 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f32_s_s: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e64 v0, 1.0, s17 -; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, s16 -; GFX8-NEXT: v_max_f32_e32 v0, v1, v0 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_maximumnum_f32_s_s: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f32_e64 v0, s17, s17 -; GFX900-NEXT: v_max_f32_e64 v1, s16, s16 -; GFX900-NEXT: v_max_f32_e32 v0, v1, v0 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_maximumnum_f32_s_s: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e64 v0, s1, s1 -; GFX950-NEXT: v_max_f32_e64 v1, s0, s0 -; GFX950-NEXT: v_max_f32_e32 v0, v1, v0 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_f32_s_s: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e64 v0, s17, s17 -; GFX10-NEXT: v_max_f32_e64 v1, s16, s16 -; GFX10-NEXT: v_max_f32_e32 v0, v1, v0 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_f32_s_s: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f32_e64 v0, s1, s1 -; GFX11-NEXT: v_max_f32_e64 v1, s0, s0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_max_f32_e32 v0, v1, v0 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_f32_s_s: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f32_e64 v0, s1, s1 -; GFX12-NEXT: v_max_num_f32_e64 v1, s0, s0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_max_num_f32_e32 v0, v1, v0 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f32_s_s: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e64 v0, 1.0, s17 +; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v1, v0 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f32_s_s: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, s16 +; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s17 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f32_s_s: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e64 v0, 1.0, s17 +; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16 +; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v1, v0 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f32_s_s: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, s16 +; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s17 +; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_maximumnum_f32_s_s: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f32_e64 v0, s17, s17 +; GFX900-SDAG-NEXT: v_max_f32_e64 v1, s16, s16 +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v1, v0 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_f32_s_s: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e64 v0, s16, s16 +; GFX900-GISEL-NEXT: v_max_f32_e64 v1, s17, s17 +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_f32_s_s: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f32_e64 v0, s1, s1 +; GFX950-SDAG-NEXT: v_max_f32_e64 v1, s0, s0 +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v1, v0 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_f32_s_s: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f32_e64 v0, s0, s0 +; GFX950-GISEL-NEXT: v_max_f32_e64 v1, s1, s1 +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_f32_s_s: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e64 v0, s17, s17 +; GFX10-SDAG-NEXT: v_max_f32_e64 v1, s16, s16 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v1, v0 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_f32_s_s: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e64 v0, s16, s16 +; GFX10-GISEL-NEXT: v_max_f32_e64 v1, s17, s17 +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_f32_s_s: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f32_e64 v0, s1, s1 +; GFX11-SDAG-NEXT: v_max_f32_e64 v1, s0, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v1, v0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_f32_s_s: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f32_e64 v0, s0, s0 +; GFX11-GISEL-NEXT: v_max_f32_e64 v1, s1, s1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_f32_s_s: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f32_e64 v0, s1, s1 +; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v1, v0 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_f32_s_s: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f32_e64 v0, s0, s0 +; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, s1, s1 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.maximumnum.f32(float %x, float %y) ret float %result } define double @v_maximumnum_f64_s_v(double inreg %x, double %y) { -; GFX7-LABEL: v_maximumnum_f64_s_v: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] -; GFX7-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f64_s_v: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] -; GFX8-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_maximumnum_f64_s_v: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] -; GFX900-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_maximumnum_f64_s_v: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] -; GFX950-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] -; GFX950-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f64_s_v: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f64_s_v: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f64_s_v: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f64_s_v: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_maximumnum_f64_s_v: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_f64_s_v: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_f64_s_v: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_f64_s_v: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_f64_s_v: ; GFX10: ; %bb.0: @@ -1241,37 +1932,69 @@ define double @v_maximumnum_f64_s_v(double inreg %x, double %y) { } define double @v_maximumnum_f64_v_s(double %x, double inreg %y) { -; GFX7-LABEL: v_maximumnum_f64_v_s: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] -; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f64_v_s: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_maximumnum_f64_v_s: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] -; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_maximumnum_f64_v_s: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] -; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX950-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f64_v_s: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f64_v_s: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f64_v_s: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f64_v_s: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_maximumnum_f64_v_s: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_f64_v_s: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_f64_v_s: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_f64_v_s: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-GISEL-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_f64_v_s: ; GFX10: ; %bb.0: @@ -1307,184 +2030,354 @@ define double @v_maximumnum_f64_v_s(double %x, double inreg %y) { } define double @v_maximumnum_f64_s_s(double inreg %x, double inreg %y) { -; GFX7-LABEL: v_maximumnum_f64_s_s: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] -; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] -; GFX7-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f64_s_s: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] -; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] -; GFX8-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_maximumnum_f64_s_s: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] -; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] -; GFX900-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_maximumnum_f64_s_s: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3] -; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] -; GFX950-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_f64_s_s: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] -; GFX10-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] -; GFX10-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_f64_s_s: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3] -; GFX11-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_f64_s_s: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f64_e64 v[0:1], s[2:3], s[2:3] -; GFX12-NEXT: v_max_num_f64_e64 v[2:3], s[0:1], s[0:1] -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[2:3], v[0:1] -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f64_s_s: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] +; GFX7-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f64_s_s: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], s[16:17], s[16:17] +; GFX7-GISEL-NEXT: v_max_f64 v[2:3], s[18:19], s[18:19] +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f64_s_s: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] +; GFX8-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f64_s_s: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], s[16:17], s[16:17] +; GFX8-GISEL-NEXT: v_max_f64 v[2:3], s[18:19], s[18:19] +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_maximumnum_f64_s_s: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] +; GFX900-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_f64_s_s: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], s[16:17], s[16:17] +; GFX900-GISEL-NEXT: v_max_f64 v[2:3], s[18:19], s[18:19] +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_f64_s_s: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_f64_s_s: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], s[0:1], s[0:1] +; GFX950-GISEL-NEXT: v_max_f64 v[2:3], s[2:3], s[2:3] +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_f64_s_s: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] +; GFX10-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_f64_s_s: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f64 v[0:1], s[16:17], s[16:17] +; GFX10-GISEL-NEXT: v_max_f64 v[2:3], s[18:19], s[18:19] +; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_f64_s_s: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3] +; GFX11-SDAG-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1] +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_f64_s_s: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f64 v[0:1], s[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_max_f64 v[2:3], s[2:3], s[2:3] +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_f64_s_s: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f64_e64 v[0:1], s[2:3], s[2:3] +; GFX12-SDAG-NEXT: v_max_num_f64_e64 v[2:3], s[0:1], s[0:1] +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[2:3], v[0:1] +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_f64_s_s: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f64_e64 v[0:1], s[0:1], s[0:1] +; GFX12-GISEL-NEXT: v_max_num_f64_e64 v[2:3], s[2:3], s[2:3] +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3] +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call double @llvm.maximumnum.f64(double %x, double %y) ret double %result } define float @v_maximumnum_f32_fabs_rhs(float %x, float %y) { -; GFX7-LABEL: v_maximumnum_f32_fabs_rhs: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, |v1| -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f32_fabs_rhs: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, |v1| -; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_maximumnum_f32_fabs_rhs: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v1, |v1|, |v1| -; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_f32_fabs_rhs: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e64 v1, |v1|, |v1| -; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX10-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_f32_fabs_rhs: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f32_e64 v1, |v1|, |v1| -; GFX11-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_f32_fabs_rhs: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| -; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f32_fabs_rhs: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, |v1| +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f32_fabs_rhs: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, |v1| +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f32_fabs_rhs: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, |v1| +; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f32_fabs_rhs: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, |v1| +; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_maximumnum_f32_fabs_rhs: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_maximumnum_f32_fabs_rhs: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX9-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_f32_fabs_rhs: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_f32_fabs_rhs: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_f32_fabs_rhs: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_f32_fabs_rhs: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX11-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_f32_fabs_rhs: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| +; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_f32_fabs_rhs: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %fabs.y = call float @llvm.fabs.f32(float %y) %result = call float @llvm.maximumnum.f32(float %x, float %fabs.y) ret float %result } define float @v_maximumnum_f32_fneg_fabs_rhs(float %x, float %y) { -; GFX7-LABEL: v_maximumnum_f32_fneg_fabs_rhs: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e64 v1, -1.0, |v1| -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f32_fneg_fabs_rhs: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e64 v1, -1.0, |v1| -; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_maximumnum_f32_fneg_fabs_rhs: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| -; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_f32_fneg_fabs_rhs: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| -; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX10-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_f32_fneg_fabs_rhs: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| -; GFX11-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_f32_fneg_fabs_rhs: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1| -; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, -1.0, |v1| +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, -1.0, |v1| +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, -1.0, |v1| +; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, -1.0, |v1| +; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX11-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1| +; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1| +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %fabs.y = call float @llvm.fabs.f32(float %y) %fneg.fabs.y = fneg float %fabs.y %result = call float @llvm.maximumnum.f32(float %x, float %fneg.fabs.y) @@ -1492,59 +2385,113 @@ define float @v_maximumnum_f32_fneg_fabs_rhs(float %x, float %y) { } define float @v_maximumnum_f32_fabs(float %x, float %y) { -; GFX7-LABEL: v_maximumnum_f32_fabs: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, |v1| -; GFX7-NEXT: v_mul_f32_e64 v0, 1.0, |v0| -; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f32_fabs: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, |v1| -; GFX8-NEXT: v_mul_f32_e64 v0, 1.0, |v0| -; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_maximumnum_f32_fabs: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v1, |v1|, |v1| -; GFX9-NEXT: v_max_f32_e64 v0, |v0|, |v0| -; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_f32_fabs: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e64 v1, |v1|, |v1| -; GFX10-NEXT: v_max_f32_e64 v0, |v0|, |v0| -; GFX10-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_f32_fabs: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f32_e64 v1, |v1|, |v1| -; GFX11-NEXT: v_max_f32_e64 v0, |v0|, |v0| -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_f32_fabs: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| -; GFX12-NEXT: v_max_num_f32_e64 v0, |v0|, |v0| -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f32_fabs: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, |v1| +; GFX7-SDAG-NEXT: v_mul_f32_e64 v0, 1.0, |v0| +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f32_fabs: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, |v0| +; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, |v1| +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f32_fabs: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, |v1| +; GFX8-SDAG-NEXT: v_mul_f32_e64 v0, 1.0, |v0| +; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f32_fabs: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, |v0| +; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, |v1| +; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_maximumnum_f32_fabs: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX9-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0| +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_maximumnum_f32_fabs: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0| +; GFX9-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_f32_fabs: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX10-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0| +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_f32_fabs: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0| +; GFX10-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_f32_fabs: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX11-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0| +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_f32_fabs: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0| +; GFX11-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_f32_fabs: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| +; GFX12-SDAG-NEXT: v_max_num_f32_e64 v0, |v0|, |v0| +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_f32_fabs: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f32_e64 v0, |v0|, |v0| +; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %fabs.x = call float @llvm.fabs.f32(float %x) %fabs.y = call float @llvm.fabs.f32(float %y) %result = call float @llvm.maximumnum.f32(float %fabs.x, float %fabs.y) @@ -1552,59 +2499,113 @@ define float @v_maximumnum_f32_fabs(float %x, float %y) { } define float @v_maximumnum_f32_fneg(float %x, float %y) { -; GFX7-LABEL: v_maximumnum_f32_fneg: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e32 v1, -1.0, v1 -; GFX7-NEXT: v_mul_f32_e32 v0, -1.0, v0 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f32_fneg: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e32 v1, -1.0, v1 -; GFX8-NEXT: v_mul_f32_e32 v0, -1.0, v0 -; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_maximumnum_f32_fneg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v1, -v1, -v1 -; GFX9-NEXT: v_max_f32_e64 v0, -v0, -v0 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_f32_fneg: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e64 v1, -v1, -v1 -; GFX10-NEXT: v_max_f32_e64 v0, -v0, -v0 -; GFX10-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_f32_fneg: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f32_e64 v1, -v1, -v1 -; GFX11-NEXT: v_max_f32_e64 v0, -v0, -v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_f32_fneg: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f32_e64 v1, -v1, -v1 -; GFX12-NEXT: v_max_num_f32_e64 v0, -v0, -v0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f32_fneg: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, -1.0, v1 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, -1.0, v0 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f32_fneg: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, -1.0, v0 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, -1.0, v1 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f32_fneg: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, -1.0, v1 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, -1.0, v0 +; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f32_fneg: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, -1.0, v0 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, -1.0, v1 +; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_maximumnum_f32_fneg: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX9-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_maximumnum_f32_fneg: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_f32_fneg: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX10-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_f32_fneg: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GFX10-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_f32_fneg: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX11-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_f32_fneg: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GFX11-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_f32_fneg: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, -v1, -v1 +; GFX12-SDAG-NEXT: v_max_num_f32_e64 v0, -v0, -v0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_f32_fneg: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f32_e64 v0, -v0, -v0 +; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, -v1, -v1 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %fneg.x = fneg float %x %fneg.y = fneg float %y %result = call float @llvm.maximumnum.f32(float %fneg.x, float %fneg.y) @@ -1612,166 +2613,320 @@ define float @v_maximumnum_f32_fneg(float %x, float %y) { } define half @v_maximumnum_f16_fabs_rhs(half %x, half %y) { -; GFX7-LABEL: v_maximumnum_f16_fabs_rhs: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e64 v1, |v1| -; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f16_fabs_rhs: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_maximumnum_f16_fabs_rhs: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_f16_fabs_rhs: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX10-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-TRUE16-LABEL: v_maximumnum_f16_fabs_rhs: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l| -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: v_maximumnum_f16_fabs_rhs: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-TRUE16-LABEL: v_maximumnum_f16_fabs_rhs: -; GFX12-TRUE16: ; %bb.0: -; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h -; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-FAKE16-LABEL: v_maximumnum_f16_fabs_rhs: -; GFX12-FAKE16: ; %bb.0: -; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 -; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1| +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, |v1| +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX9-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-SDAG-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX11-TRUE16-SDAG: ; %bb.0: +; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-GISEL-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX11-TRUE16-GISEL: ; %bb.0: +; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-SDAG-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX11-FAKE16-SDAG: ; %bb.0: +; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-GISEL-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX11-FAKE16-GISEL: ; %bb.0: +; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX12-TRUE16-SDAG: ; %bb.0: +; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-GISEL-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX12-TRUE16-GISEL: ; %bb.0: +; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-SDAG-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX12-FAKE16-SDAG: ; %bb.0: +; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-GISEL-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX12-FAKE16-GISEL: ; %bb.0: +; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| +; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] %fabs.y = call half @llvm.fabs.f16(half %y) %result = call half @llvm.maximumnum.f16(half %x, half %fabs.y) ret half %result } define half @v_maximumnum_f16_fneg_fabs_rhs(half %x, half %y) { -; GFX7-LABEL: v_maximumnum_f16_fneg_fabs_rhs: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e64 v1, -|v1| -; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f16_fneg_fabs_rhs: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_maximumnum_f16_fneg_fabs_rhs: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| -; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_f16_fneg_fabs_rhs: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| -; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX10-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-TRUE16-LABEL: v_maximumnum_f16_fneg_fabs_rhs: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, -|v1.l|, -|v1.l| -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: v_maximumnum_f16_fneg_fabs_rhs: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| -; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-TRUE16-LABEL: v_maximumnum_f16_fneg_fabs_rhs: -; GFX12-TRUE16: ; %bb.0: -; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l| -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h -; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-FAKE16-LABEL: v_maximumnum_f16_fneg_fabs_rhs: -; GFX12-FAKE16: ; %bb.0: -; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1| -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 -; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v1, -|v1| +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, -|v1| +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX9-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX11-TRUE16-SDAG: ; %bb.0: +; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, -|v1.l|, -|v1.l| +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX11-TRUE16-GISEL: ; %bb.0: +; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, -|v1.l|, -|v1.l| +; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX11-FAKE16-SDAG: ; %bb.0: +; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX11-FAKE16-GISEL: ; %bb.0: +; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX12-TRUE16-SDAG: ; %bb.0: +; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l| +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX12-TRUE16-GISEL: ; %bb.0: +; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l| +; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX12-FAKE16-SDAG: ; %bb.0: +; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1| +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX12-FAKE16-GISEL: ; %bb.0: +; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1| +; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] %fabs.y = call half @llvm.fabs.f16(half %y) %fneg.fabs.y = fneg half %fabs.y %result = call half @llvm.maximumnum.f16(half %x, half %fneg.fabs.y) @@ -1779,83 +2934,160 @@ define half @v_maximumnum_f16_fneg_fabs_rhs(half %x, half %y) { } define half @v_maximumnum_f16_fabs(half %x, half %y) { -; GFX7-LABEL: v_maximumnum_f16_fabs: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e64 v0, |v0| -; GFX7-NEXT: v_cvt_f32_f16_e64 v1, |v1| -; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f16_fabs: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX8-NEXT: v_max_f16_e64 v0, |v0|, |v0| -; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_maximumnum_f16_fabs: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX9-NEXT: v_max_f16_e64 v0, |v0|, |v0| -; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_f16_fabs: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX10-NEXT: v_max_f16_e64 v0, |v0|, |v0| -; GFX10-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-TRUE16-LABEL: v_maximumnum_f16_fabs: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l| -; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, |v0.l|, |v0.l| -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: v_maximumnum_f16_fabs: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX11-FAKE16-NEXT: v_max_f16_e64 v0, |v0|, |v0| -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-TRUE16-LABEL: v_maximumnum_f16_fabs: -; GFX12-TRUE16: ; %bb.0: -; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| -; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l| -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h -; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-FAKE16-LABEL: v_maximumnum_f16_fabs: -; GFX12-FAKE16: ; %bb.0: -; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| -; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v0, |v0|, |v0| -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 -; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f16_fabs: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1| +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f16_fabs: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, |v1| +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f16_fabs: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX8-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f16_fabs: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX8-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_maximumnum_f16_fabs: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX9-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_maximumnum_f16_fabs: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX9-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_f16_fabs: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX10-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_f16_fabs: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX10-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-SDAG-LABEL: v_maximumnum_f16_fabs: +; GFX11-TRUE16-SDAG: ; %bb.0: +; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.l, |v0.l|, |v0.l| +; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-GISEL-LABEL: v_maximumnum_f16_fabs: +; GFX11-TRUE16-GISEL: ; %bb.0: +; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.l, |v0.l|, |v0.l| +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-SDAG-LABEL: v_maximumnum_f16_fabs: +; GFX11-FAKE16-SDAG: ; %bb.0: +; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-GISEL-LABEL: v_maximumnum_f16_fabs: +; GFX11-FAKE16-GISEL: ; %bb.0: +; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_fabs: +; GFX12-TRUE16-SDAG: ; %bb.0: +; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l| +; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-GISEL-LABEL: v_maximumnum_f16_fabs: +; GFX12-TRUE16-GISEL: ; %bb.0: +; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l| +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-SDAG-LABEL: v_maximumnum_f16_fabs: +; GFX12-FAKE16-SDAG: ; %bb.0: +; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, |v0|, |v0| +; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-GISEL-LABEL: v_maximumnum_f16_fabs: +; GFX12-FAKE16-GISEL: ; %bb.0: +; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, |v0|, |v0| +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| +; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] %fabs.x = call half @llvm.fabs.f16(half %x) %fabs.y = call half @llvm.fabs.f16(half %y) %result = call half @llvm.maximumnum.f16(half %fabs.x, half %fabs.y) @@ -1863,83 +3095,160 @@ define half @v_maximumnum_f16_fabs(half %x, half %y) { } define half @v_maximumnum_f16_fneg(half %x, half %y) { -; GFX7-LABEL: v_maximumnum_f16_fneg: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -v0 -; GFX7-NEXT: v_cvt_f16_f32_e64 v1, -v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f16_fneg: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_e64 v1, -v1, -v1 -; GFX8-NEXT: v_max_f16_e64 v0, -v0, -v0 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_maximumnum_f16_fneg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f16_e64 v1, -v1, -v1 -; GFX9-NEXT: v_max_f16_e64 v0, -v0, -v0 -; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_f16_fneg: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f16_e64 v1, -v1, -v1 -; GFX10-NEXT: v_max_f16_e64 v0, -v0, -v0 -; GFX10-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-TRUE16-LABEL: v_maximumnum_f16_fneg: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, -v1.l, -v1.l -; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, -v0.l, -v0.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: v_maximumnum_f16_fneg: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, -v1, -v1 -; GFX11-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-TRUE16-LABEL: v_maximumnum_f16_fneg: -; GFX12-TRUE16: ; %bb.0: -; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l -; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h -; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-FAKE16-LABEL: v_maximumnum_f16_fneg: -; GFX12-FAKE16: ; %bb.0: -; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, -v1, -v1 -; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v0, -v0, -v0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 -; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f16_fneg: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e64 v1, -v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f16_fneg: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, -v1 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f16_fneg: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX8-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f16_fneg: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX8-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_maximumnum_f16_fneg: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX9-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_maximumnum_f16_fneg: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX9-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_f16_fneg: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX10-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_f16_fneg: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX10-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-SDAG-LABEL: v_maximumnum_f16_fneg: +; GFX11-TRUE16-SDAG: ; %bb.0: +; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, -v1.l, -v1.l +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.l, -v0.l, -v0.l +; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-GISEL-LABEL: v_maximumnum_f16_fneg: +; GFX11-TRUE16-GISEL: ; %bb.0: +; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.l, -v0.l, -v0.l +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, -v1.l, -v1.l +; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-SDAG-LABEL: v_maximumnum_f16_fneg: +; GFX11-FAKE16-SDAG: ; %bb.0: +; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-GISEL-LABEL: v_maximumnum_f16_fneg: +; GFX11-FAKE16-GISEL: ; %bb.0: +; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_fneg: +; GFX12-TRUE16-SDAG: ; %bb.0: +; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l +; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-GISEL-LABEL: v_maximumnum_f16_fneg: +; GFX12-TRUE16-GISEL: ; %bb.0: +; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l +; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-SDAG-LABEL: v_maximumnum_f16_fneg: +; GFX12-FAKE16-SDAG: ; %bb.0: +; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, -v1, -v1 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, -v0, -v0 +; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-GISEL-LABEL: v_maximumnum_f16_fneg: +; GFX12-FAKE16-GISEL: ; %bb.0: +; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, -v0, -v0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, -v1, -v1 +; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] %fneg.x = fneg half %x %fneg.y = fneg half %y %result = call half @llvm.maximumnum.f16(half %fneg.x, half %fneg.y) @@ -1947,166 +3256,313 @@ define half @v_maximumnum_f16_fneg(half %x, half %y) { } define double @v_maximumnum_f64_fneg(double %x, double %y) { -; GFX7-LABEL: v_maximumnum_f64_fneg: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] -; GFX7-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] -; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f64_fneg: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] -; GFX8-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_maximumnum_f64_fneg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] -; GFX9-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_f64_fneg: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] -; GFX10-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] -; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_f64_fneg: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] -; GFX11-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_f64_fneg: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f64_e64 v[2:3], -v[2:3], -v[2:3] -; GFX12-NEXT: v_max_num_f64_e64 v[0:1], -v[0:1], -v[0:1] -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3] -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f64_fneg: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f64_fneg: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX7-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f64_fneg: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f64_fneg: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX8-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_maximumnum_f64_fneg: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX9-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_maximumnum_f64_fneg: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX9-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_f64_fneg: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX10-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_f64_fneg: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX10-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_f64_fneg: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX11-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_f64_fneg: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX11-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_f64_fneg: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f64_e64 v[2:3], -v[2:3], -v[2:3] +; GFX12-SDAG-NEXT: v_max_num_f64_e64 v[0:1], -v[0:1], -v[0:1] +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3] +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_f64_fneg: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f64_e64 v[0:1], -v[0:1], -v[0:1] +; GFX12-GISEL-NEXT: v_max_num_f64_e64 v[2:3], -v[2:3], -v[2:3] +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3] +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %fneg.x = fneg double %x %fneg.y = fneg double %y %result = call double @llvm.maximumnum.f64(double %fneg.x, double %fneg.y) ret double %result -} - -define <2 x half> @v_maximumnum_v2f16(<2 x half> %x, <2 x half> %y) { -; GFX7-LABEL: v_maximumnum_v2f16: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v2 -; GFX7-NEXT: v_max_f32_e32 v1, v1, v3 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_v2f16: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_max_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_maximumnum_v2f16: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v1 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_maximumnum_v2f16: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v1 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_v2f16: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_v2f16: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_pk_max_f16 v0, v0, v1 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_v2f16: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v1 -; GFX12-NEXT: s_setpc_b64 s[30:31] +} + +define <2 x half> @v_maximumnum_v2f16(<2 x half> %x, <2 x half> %y) { +; GFX7-SDAG-LABEL: v_maximumnum_v2f16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_v2f16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_v2f16: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_v2f16: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v3, v1, v1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v2, v3 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_maximumnum_v2f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_v2f16: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_v2f16: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-SDAG-NEXT: s_nop 0 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_v2f16: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-GISEL-NEXT: s_nop 0 +; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_v2f16: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_v2f16: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_v2f16: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_v2f16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_v2f16: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v1 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_v2f16: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v1 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> %x, <2 x half> %y) ret <2 x half> %result } define <2 x half> @v_maximumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) { -; GFX7-LABEL: v_maximumnum_v2f16_nnan: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v2 -; GFX7-NEXT: v_max_f32_e32 v1, v1, v3 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_v2f16_nnan: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_v2f16_nnan: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_v2f16_nnan: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_v2f16_nnan: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_v2f16_nnan: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v0, v1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_maximumnum_v2f16_nnan: ; GFX9: ; %bb.0: @@ -2140,135 +3596,243 @@ define <2 x half> @v_maximumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) { } define <3 x half> @v_maximumnum_v3f16(<3 x half> %x, <3 x half> %y) { -; GFX7-LABEL: v_maximumnum_v3f16: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v3 -; GFX7-NEXT: v_max_f32_e32 v1, v1, v4 -; GFX7-NEXT: v_max_f32_e32 v2, v2, v5 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_v3f16: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v4, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v5, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_max_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_e32 v3, v3, v3 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v2 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v3 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_maximumnum_v3f16: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v2 -; GFX900-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX900-NEXT: v_pk_max_f16 v1, v1, v2 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_maximumnum_v3f16: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX950-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v2 -; GFX950-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_pk_max_f16 v1, v1, v2 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_v3f16: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX10-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v2 -; GFX10-NEXT: v_pk_max_f16 v1, v1, v3 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_v3f16: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX11-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_pk_max_f16 v0, v0, v2 -; GFX11-NEXT: v_pk_max_f16 v1, v1, v3 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_v3f16: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2 -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 -; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3 -; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v2 -; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v3 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_v3f16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v4 +; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v5 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_v3f16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v5 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_v3f16: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_sdwa v4, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v5, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v3 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_v3f16: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v5, v2, v2 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v3, v3, v3 +; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v4, v5 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v3 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_maximumnum_v3f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v2 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_maximumnum_v3f16: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_v3f16: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX950-SDAG-NEXT: s_nop 0 +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v2 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_v3f16: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_v3f16: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_v3f16: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_v3f16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_v3f16: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v2 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v3 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_v3f16: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v2 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v3 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <3 x half> @llvm.maximumnum.v3f16(<3 x half> %x, <3 x half> %y) ret <3 x half> %result } define <3 x half> @v_maximumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) { -; GFX7-LABEL: v_maximumnum_v3f16_nnan: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v3 -; GFX7-NEXT: v_max_f32_e32 v1, v1, v4 -; GFX7-NEXT: v_max_f32_e32 v2, v2, v5 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_v3f16_nnan: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v2 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v3 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_v3f16_nnan: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v4 +; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v5 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_v3f16_nnan: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v5 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_v3f16_nnan: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v3 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_v3f16_nnan: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v0, v2 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v3 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_maximumnum_v3f16_nnan: ; GFX9: ; %bb.0: @@ -2306,151 +3870,273 @@ define <3 x half> @v_maximumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) { } define <4 x half> @v_maximumnum_v4f16(<4 x half> %x, <4 x half> %y) { -; GFX7-LABEL: v_maximumnum_v4f16: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 -; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 -; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v4 -; GFX7-NEXT: v_max_f32_e32 v1, v1, v5 -; GFX7-NEXT: v_max_f32_e32 v2, v2, v6 -; GFX7-NEXT: v_max_f32_e32 v3, v3, v7 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_v4f16: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v4, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v5, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v5, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v6, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v3, v3, v3 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX8-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_max_f16_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_e32 v1, v1, v3 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v2 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_maximumnum_v4f16: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v2 -; GFX900-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX900-NEXT: v_pk_max_f16 v1, v1, v2 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_maximumnum_v4f16: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX950-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v2 -; GFX950-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_pk_max_f16 v1, v1, v2 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_v4f16: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX10-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v2 -; GFX10-NEXT: v_pk_max_f16 v1, v1, v3 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_v4f16: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX11-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_pk_max_f16 v0, v0, v2 -; GFX11-NEXT: v_pk_max_f16 v1, v1, v3 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_v4f16: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2 -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 -; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3 -; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v2 -; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v3 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_v4f16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v5 +; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v6 +; GFX7-SDAG-NEXT: v_max_f32_e32 v3, v3, v7 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_v4f16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v6 +; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v5 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v7 +; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: v_max_f32_e32 v3, v3, v5 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_v4f16: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_sdwa v4, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v5, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v5, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v6, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v3 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v5 +; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_v4f16: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v5, v1, v1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v6, v2, v2 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v7, v3, v3 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v4, v6 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v5, v7 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_maximumnum_v4f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v2 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_maximumnum_v4f16: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_v4f16: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX950-SDAG-NEXT: s_nop 0 +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v2 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_v4f16: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_v4f16: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_v4f16: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_v4f16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_v4f16: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v2 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v3 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_v4f16: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v2 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v3 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> %x, <4 x half> %y) ret <4 x half> %result } define <4 x half> @v_maximumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) { -; GFX7-LABEL: v_maximumnum_v4f16_nnan: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 -; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 -; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 -; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 -; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v4 -; GFX7-NEXT: v_max_f32_e32 v1, v1, v5 -; GFX7-NEXT: v_max_f32_e32 v2, v2, v6 -; GFX7-NEXT: v_max_f32_e32 v3, v3, v7 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_v4f16_nnan: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v3 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v2 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_v4f16_nnan: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v5 +; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v6 +; GFX7-SDAG-NEXT: v_max_f32_e32 v3, v3, v7 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_v4f16_nnan: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v6 +; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v5 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v7 +; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: v_max_f32_e32 v3, v3, v5 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_v4f16_nnan: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v3 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v5 +; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_v4f16_nnan: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v0, v2 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v1, v3 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_maximumnum_v4f16_nnan: ; GFX9: ; %bb.0: @@ -2488,1349 +4174,2545 @@ define <4 x half> @v_maximumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) { } define <6 x half> @v_maximumnum_v6f16(<6 x half> %x, <6 x half> %y) { -; GFX7-LABEL: v_maximumnum_v6f16: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 -; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11 -; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9 -; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 -; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 -; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11 -; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 -; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 -; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8 -; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v6 -; GFX7-NEXT: v_max_f32_e32 v1, v1, v7 -; GFX7-NEXT: v_max_f32_e32 v2, v2, v8 -; GFX7-NEXT: v_max_f32_e32 v3, v3, v9 -; GFX7-NEXT: v_max_f32_e32 v4, v4, v10 -; GFX7-NEXT: v_max_f32_e32 v5, v5, v11 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_v6f16: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v6, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v7, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v6, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v7, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v8, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v7, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v8, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v9, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX8-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX8-NEXT: v_max_f16_e32 v4, v4, v4 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX8-NEXT: v_max_f16_e32 v3, v3, v3 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_max_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_e32 v2, v2, v5 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v4 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v3 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v7 -; GFX8-NEXT: v_or_b32_e32 v2, v2, v6 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_maximumnum_v6f16: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v3 -; GFX900-NEXT: v_pk_max_f16 v3, v4, v4 -; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX900-NEXT: v_pk_max_f16 v1, v1, v3 -; GFX900-NEXT: v_pk_max_f16 v3, v5, v5 -; GFX900-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX900-NEXT: v_pk_max_f16 v2, v2, v3 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_maximumnum_v6f16: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX950-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v3 -; GFX950-NEXT: v_pk_max_f16 v3, v4, v4 -; GFX950-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX950-NEXT: v_pk_max_f16 v1, v1, v3 -; GFX950-NEXT: v_pk_max_f16 v3, v5, v5 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_pk_max_f16 v2, v2, v3 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_v6f16: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX10-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX10-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX10-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v3 -; GFX10-NEXT: v_pk_max_f16 v1, v1, v4 -; GFX10-NEXT: v_pk_max_f16 v2, v2, v5 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_v6f16: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX11-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX11-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX11-NEXT: v_pk_max_f16 v0, v0, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_pk_max_f16 v1, v1, v4 -; GFX11-NEXT: v_pk_max_f16 v2, v2, v5 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_v6f16: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3 -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 -; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4 -; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 -; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5 -; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2 -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v3 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v4 -; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v5 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_v6f16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v11, v11 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v10, v10 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v9, v9 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v8, v8 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v11, v11 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v10, v10 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v9, v9 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v6 +; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v7 +; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v8 +; GFX7-SDAG-NEXT: v_max_f32_e32 v3, v3, v9 +; GFX7-SDAG-NEXT: v_max_f32_e32 v4, v4, v10 +; GFX7-SDAG-NEXT: v_max_f32_e32 v5, v5, v11 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_v6f16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v6 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v8 +; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v7 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v9 +; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v6 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v10 +; GFX7-GISEL-NEXT: v_max_f32_e32 v3, v3, v7 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v11 +; GFX7-GISEL-NEXT: v_max_f32_e32 v4, v4, v6 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: v_max_f32_e32 v5, v5, v7 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_v6f16: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_sdwa v6, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v7, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v6, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v7, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v8, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v7, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v8, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v9, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v5 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v4 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v3 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v8 +; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v7 +; GFX8-SDAG-NEXT: v_or_b32_e32 v2, v2, v6 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_v6f16: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v6, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v7, v1, v1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v8, v2, v2 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v9, v3, v3 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v10, v4, v4 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v11, v5, v5 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v6, v6, v9 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v3, v7, v10 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v8, v11 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v6, v0 +; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v2, v4, v2 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_maximumnum_v6f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v3 +; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v4, v4 +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v5, v5 +; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v3 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_maximumnum_v6f16: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX9-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX9-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v3 +; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v4 +; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v5 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_v6f16: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v3 +; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v4, v4 +; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v5, v5 +; GFX950-SDAG-NEXT: s_nop 0 +; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v3 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_v6f16: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v3 +; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v4 +; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v5 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_v6f16: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v3 +; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v4 +; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v5 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_v6f16: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v3 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v4 +; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v5 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_v6f16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v3 +; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v4 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v5 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_v6f16: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v3 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v4 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v5 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_v6f16: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v3 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v4 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v5 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <6 x half> @llvm.maximumnum.v6f16(<6 x half> %x, <6 x half> %y) ret <6 x half> %result } define <8 x half> @v_maximumnum_v8f16(<8 x half> %x, <8 x half> %y) { -; GFX7-LABEL: v_maximumnum_v8f16: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 -; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v15 -; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 -; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14 -; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 -; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13 -; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11 -; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8 -; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9 -; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 -; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15 -; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 -; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14 -; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v13 -; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8 -; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9 -; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10 -; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v8 -; GFX7-NEXT: v_max_f32_e32 v1, v1, v9 -; GFX7-NEXT: v_max_f32_e32 v2, v2, v10 -; GFX7-NEXT: v_max_f32_e32 v3, v3, v11 -; GFX7-NEXT: v_max_f32_e32 v4, v4, v12 -; GFX7-NEXT: v_max_f32_e32 v5, v5, v13 -; GFX7-NEXT: v_max_f32_e32 v6, v6, v14 -; GFX7-NEXT: v_max_f32_e32 v7, v7, v15 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_v8f16: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v8, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v9, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v9, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v10, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v10, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v11, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v10, v11, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v11, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v12, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v7, v7, v7 -; GFX8-NEXT: v_max_f16_e32 v3, v3, v3 -; GFX8-NEXT: v_max_f16_e32 v6, v6, v6 -; GFX8-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX8-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX8-NEXT: v_max_f16_e32 v4, v4, v4 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_max_f16_sdwa v11, v12, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_e32 v3, v3, v7 -; GFX8-NEXT: v_max_f16_e32 v2, v2, v6 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v5 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v4 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v11 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v10 -; GFX8-NEXT: v_or_b32_e32 v2, v2, v9 -; GFX8-NEXT: v_or_b32_e32 v3, v3, v8 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_maximumnum_v8f16: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v4 -; GFX900-NEXT: v_pk_max_f16 v4, v5, v5 -; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX900-NEXT: v_pk_max_f16 v1, v1, v4 -; GFX900-NEXT: v_pk_max_f16 v4, v6, v6 -; GFX900-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX900-NEXT: v_pk_max_f16 v2, v2, v4 -; GFX900-NEXT: v_pk_max_f16 v4, v7, v7 -; GFX900-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX900-NEXT: v_pk_max_f16 v3, v3, v4 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_maximumnum_v8f16: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX950-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v4 -; GFX950-NEXT: v_pk_max_f16 v4, v5, v5 -; GFX950-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX950-NEXT: v_pk_max_f16 v1, v1, v4 -; GFX950-NEXT: v_pk_max_f16 v4, v6, v6 -; GFX950-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX950-NEXT: v_pk_max_f16 v2, v2, v4 -; GFX950-NEXT: v_pk_max_f16 v4, v7, v7 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_pk_max_f16 v3, v3, v4 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_v8f16: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX10-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX10-NEXT: v_pk_max_f16 v6, v6, v6 -; GFX10-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX10-NEXT: v_pk_max_f16 v7, v7, v7 -; GFX10-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v4 -; GFX10-NEXT: v_pk_max_f16 v1, v1, v5 -; GFX10-NEXT: v_pk_max_f16 v2, v2, v6 -; GFX10-NEXT: v_pk_max_f16 v3, v3, v7 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_v8f16: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX11-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX11-NEXT: v_pk_max_f16 v6, v6, v6 -; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX11-NEXT: v_pk_max_f16 v7, v7, v7 -; GFX11-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX11-NEXT: v_pk_max_f16 v0, v0, v4 -; GFX11-NEXT: v_pk_max_f16 v1, v1, v5 -; GFX11-NEXT: v_pk_max_f16 v2, v2, v6 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-NEXT: v_pk_max_f16 v3, v3, v7 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_v8f16: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4 -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 -; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5 -; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 -; GFX12-NEXT: v_pk_max_num_f16 v6, v6, v6 -; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2 -; GFX12-NEXT: v_pk_max_num_f16 v7, v7, v7 -; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3 -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v4 -; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v5 -; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v6 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v7 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_v8f16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v15, v15 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v14, v14 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v13, v13 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v12, v12 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v11, v11 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v10, v10 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v8, v8 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v9, v9 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v15, v15 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v14, v14 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v13, v13 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v12, v12 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v9, v9 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v10, v10 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v11, v11 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v8 +; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v9 +; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v10 +; GFX7-SDAG-NEXT: v_max_f32_e32 v3, v3, v11 +; GFX7-SDAG-NEXT: v_max_f32_e32 v4, v4, v12 +; GFX7-SDAG-NEXT: v_max_f32_e32 v5, v5, v13 +; GFX7-SDAG-NEXT: v_max_f32_e32 v6, v6, v14 +; GFX7-SDAG-NEXT: v_max_f32_e32 v7, v7, v15 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_v8f16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v9 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v8 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v10 +; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v9 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v11 +; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v8 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v12 +; GFX7-GISEL-NEXT: v_max_f32_e32 v3, v3, v9 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v13 +; GFX7-GISEL-NEXT: v_max_f32_e32 v4, v4, v8 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v14 +; GFX7-GISEL-NEXT: v_max_f32_e32 v5, v5, v9 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v15 +; GFX7-GISEL-NEXT: v_max_f32_e32 v6, v6, v8 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: v_max_f32_e32 v7, v7, v9 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_v8f16: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_sdwa v8, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v9, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v9, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v10, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v10, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v11, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v10, v11, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v11, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v12, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v7, v7, v7 +; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3 +; GFX8-SDAG-NEXT: v_max_f16_e32 v6, v6, v6 +; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v11, v12, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v7 +; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v6 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v5 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v4 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v11 +; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v10 +; GFX8-SDAG-NEXT: v_or_b32_e32 v2, v2, v9 +; GFX8-SDAG-NEXT: v_or_b32_e32 v3, v3, v8 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_v8f16: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v8, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v9, v1, v1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v10, v2, v2 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v11, v3, v3 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v12, v4, v4 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v13, v5, v5 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v14, v6, v6 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v15, v7, v7 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v8, v8, v12 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v9, v13 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v5, v10, v14 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v6, v11, v15 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v8, v0 +; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v4, v1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v2, v5, v2 +; GFX8-GISEL-NEXT: v_or_b32_e32 v3, v6, v3 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_maximumnum_v8f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v4 +; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v5, v5 +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v4 +; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v6, v6 +; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v4 +; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v7, v7 +; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v4 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_maximumnum_v8f16: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX9-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX9-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX9-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX9-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v4 +; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v5 +; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v6 +; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v7 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_v8f16: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v4 +; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v5, v5 +; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v4 +; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v6, v6 +; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v4 +; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v7, v7 +; GFX950-SDAG-NEXT: s_nop 0 +; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v4 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_v8f16: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v4 +; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v5 +; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v6 +; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v7 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_v8f16: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX10-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX10-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v4 +; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v5 +; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v6 +; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v7 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_v8f16: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v4 +; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v5 +; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v6 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v7 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_v8f16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX11-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX11-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v4 +; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v5 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v6 +; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v7 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_v8f16: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v4 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v5 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v6 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v7 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_v8f16: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v4 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v5 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v6 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v7 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> %x, <8 x half> %y) ret <8 x half> %result } -define <16 x half> @v_maximumnum_v16f16(<16 x half> %x, <16 x half> %y) { -; GFX7-LABEL: v_maximumnum_v16f16: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v16 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v16 -; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v17 -; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v20 -; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16 -; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 -; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 -; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_max_f32_e32 v1, v1, v16 -; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v18 -; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v21 -; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17 -; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16 -; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18 -; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 -; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 -; GFX7-NEXT: v_max_f32_e32 v2, v2, v16 -; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v19 -; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v22 -; GFX7-NEXT: v_cvt_f16_f32_e32 v20, v23 -; GFX7-NEXT: v_max_f32_e32 v4, v4, v17 -; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16 -; GFX7-NEXT: v_cvt_f32_f16_e32 v19, v19 -; GFX7-NEXT: v_max_f32_e32 v5, v5, v18 -; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8 -; GFX7-NEXT: v_max_f32_e32 v3, v3, v16 -; GFX7-NEXT: buffer_load_dword v16, off, s[0:3], s32 -; GFX7-NEXT: v_max_f32_e32 v6, v6, v19 -; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v24 -; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9 -; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v25 -; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10 -; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v26 -; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 -; GFX7-NEXT: v_cvt_f32_f16_e32 v20, v20 -; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8 -; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17 -; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9 -; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18 -; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10 -; GFX7-NEXT: v_cvt_f32_f16_e32 v19, v19 -; GFX7-NEXT: v_max_f32_e32 v7, v7, v20 -; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11 -; GFX7-NEXT: v_cvt_f16_f32_e32 v20, v27 -; GFX7-NEXT: v_max_f32_e32 v8, v8, v17 -; GFX7-NEXT: v_max_f32_e32 v9, v9, v18 -; GFX7-NEXT: v_max_f32_e32 v10, v10, v19 -; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12 -; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v28 -; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13 -; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v29 -; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14 -; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v30 -; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v15 -; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11 -; GFX7-NEXT: v_cvt_f32_f16_e32 v20, v20 -; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12 -; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17 -; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v13 -; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18 -; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14 -; GFX7-NEXT: v_cvt_f32_f16_e32 v19, v19 -; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15 -; GFX7-NEXT: v_max_f32_e32 v11, v11, v20 -; GFX7-NEXT: v_max_f32_e32 v12, v12, v17 -; GFX7-NEXT: v_max_f32_e32 v13, v13, v18 -; GFX7-NEXT: v_max_f32_e32 v14, v14, v19 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v16 -; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16 -; GFX7-NEXT: v_max_f32_e32 v15, v15, v16 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_v16f16: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v16, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v17, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v16, v17, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v17, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v18, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v17, v18, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v18, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v19, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v18, v19, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v19, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v20, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v19, v20, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v20, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v21, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v20, v21, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v21, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v22, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v21, v22, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v22, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v23, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v22, v23, v22 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v23, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v24, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v15, v15, v15 -; GFX8-NEXT: v_max_f16_e32 v7, v7, v7 -; GFX8-NEXT: v_max_f16_e32 v14, v14, v14 -; GFX8-NEXT: v_max_f16_e32 v6, v6, v6 -; GFX8-NEXT: v_max_f16_e32 v13, v13, v13 -; GFX8-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX8-NEXT: v_max_f16_e32 v12, v12, v12 -; GFX8-NEXT: v_max_f16_e32 v4, v4, v4 -; GFX8-NEXT: v_max_f16_e32 v11, v11, v11 -; GFX8-NEXT: v_max_f16_e32 v3, v3, v3 -; GFX8-NEXT: v_max_f16_e32 v10, v10, v10 -; GFX8-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX8-NEXT: v_max_f16_e32 v9, v9, v9 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX8-NEXT: v_max_f16_e32 v8, v8, v8 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_max_f16_sdwa v23, v24, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_e32 v7, v7, v15 -; GFX8-NEXT: v_max_f16_e32 v6, v6, v14 -; GFX8-NEXT: v_max_f16_e32 v5, v5, v13 -; GFX8-NEXT: v_max_f16_e32 v4, v4, v12 -; GFX8-NEXT: v_max_f16_e32 v3, v3, v11 -; GFX8-NEXT: v_max_f16_e32 v2, v2, v10 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v9 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v8 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v23 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v22 -; GFX8-NEXT: v_or_b32_e32 v2, v2, v21 -; GFX8-NEXT: v_or_b32_e32 v3, v3, v20 -; GFX8-NEXT: v_or_b32_e32 v4, v4, v19 -; GFX8-NEXT: v_or_b32_e32 v5, v5, v18 -; GFX8-NEXT: v_or_b32_e32 v6, v6, v17 -; GFX8-NEXT: v_or_b32_e32 v7, v7, v16 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_maximumnum_v16f16: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_pk_max_f16 v8, v8, v8 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v8 -; GFX900-NEXT: v_pk_max_f16 v8, v9, v9 -; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX900-NEXT: v_pk_max_f16 v1, v1, v8 -; GFX900-NEXT: v_pk_max_f16 v8, v10, v10 -; GFX900-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX900-NEXT: v_pk_max_f16 v2, v2, v8 -; GFX900-NEXT: v_pk_max_f16 v8, v11, v11 -; GFX900-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX900-NEXT: v_pk_max_f16 v3, v3, v8 -; GFX900-NEXT: v_pk_max_f16 v8, v12, v12 -; GFX900-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX900-NEXT: v_pk_max_f16 v4, v4, v8 -; GFX900-NEXT: v_pk_max_f16 v8, v13, v13 -; GFX900-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX900-NEXT: v_pk_max_f16 v5, v5, v8 -; GFX900-NEXT: v_pk_max_f16 v8, v14, v14 -; GFX900-NEXT: v_pk_max_f16 v6, v6, v6 -; GFX900-NEXT: v_pk_max_f16 v6, v6, v8 -; GFX900-NEXT: v_pk_max_f16 v8, v15, v15 -; GFX900-NEXT: v_pk_max_f16 v7, v7, v7 -; GFX900-NEXT: v_pk_max_f16 v7, v7, v8 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_maximumnum_v16f16: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_pk_max_f16 v8, v8, v8 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX950-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v8 -; GFX950-NEXT: v_pk_max_f16 v8, v9, v9 -; GFX950-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX950-NEXT: v_pk_max_f16 v1, v1, v8 -; GFX950-NEXT: v_pk_max_f16 v8, v10, v10 -; GFX950-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX950-NEXT: v_pk_max_f16 v2, v2, v8 -; GFX950-NEXT: v_pk_max_f16 v8, v11, v11 -; GFX950-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX950-NEXT: v_pk_max_f16 v3, v3, v8 -; GFX950-NEXT: v_pk_max_f16 v8, v12, v12 -; GFX950-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX950-NEXT: v_pk_max_f16 v4, v4, v8 -; GFX950-NEXT: v_pk_max_f16 v8, v13, v13 -; GFX950-NEXT: v_pk_max_f16 v6, v6, v6 -; GFX950-NEXT: v_pk_max_f16 v5, v5, v8 -; GFX950-NEXT: v_pk_max_f16 v8, v14, v14 -; GFX950-NEXT: v_pk_max_f16 v7, v7, v7 -; GFX950-NEXT: v_pk_max_f16 v6, v6, v8 -; GFX950-NEXT: v_pk_max_f16 v8, v15, v15 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_pk_max_f16 v7, v7, v8 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_v16f16: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_pk_max_f16 v8, v8, v8 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX10-NEXT: v_pk_max_f16 v9, v9, v9 -; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX10-NEXT: v_pk_max_f16 v10, v10, v10 -; GFX10-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v8 -; GFX10-NEXT: v_pk_max_f16 v8, v11, v11 -; GFX10-NEXT: v_pk_max_f16 v1, v1, v9 -; GFX10-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX10-NEXT: v_pk_max_f16 v2, v2, v10 -; GFX10-NEXT: v_pk_max_f16 v9, v12, v12 -; GFX10-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX10-NEXT: v_pk_max_f16 v10, v13, v13 -; GFX10-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX10-NEXT: v_pk_max_f16 v11, v14, v14 -; GFX10-NEXT: v_pk_max_f16 v6, v6, v6 -; GFX10-NEXT: v_pk_max_f16 v12, v15, v15 -; GFX10-NEXT: v_pk_max_f16 v7, v7, v7 -; GFX10-NEXT: v_pk_max_f16 v3, v3, v8 -; GFX10-NEXT: v_pk_max_f16 v4, v4, v9 -; GFX10-NEXT: v_pk_max_f16 v5, v5, v10 -; GFX10-NEXT: v_pk_max_f16 v6, v6, v11 -; GFX10-NEXT: v_pk_max_f16 v7, v7, v12 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_v16f16: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_pk_max_f16 v8, v8, v8 -; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX11-NEXT: v_pk_max_f16 v9, v9, v9 -; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX11-NEXT: v_pk_max_f16 v10, v10, v10 -; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX11-NEXT: v_pk_max_f16 v0, v0, v8 -; GFX11-NEXT: v_pk_max_f16 v8, v11, v11 -; GFX11-NEXT: v_pk_max_f16 v1, v1, v9 -; GFX11-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX11-NEXT: v_pk_max_f16 v2, v2, v10 -; GFX11-NEXT: v_pk_max_f16 v9, v12, v12 -; GFX11-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX11-NEXT: v_pk_max_f16 v10, v13, v13 -; GFX11-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX11-NEXT: v_pk_max_f16 v11, v14, v14 -; GFX11-NEXT: v_pk_max_f16 v6, v6, v6 -; GFX11-NEXT: v_pk_max_f16 v12, v15, v15 -; GFX11-NEXT: v_pk_max_f16 v7, v7, v7 -; GFX11-NEXT: v_pk_max_f16 v3, v3, v8 -; GFX11-NEXT: v_pk_max_f16 v4, v4, v9 -; GFX11-NEXT: v_pk_max_f16 v5, v5, v10 -; GFX11-NEXT: v_pk_max_f16 v6, v6, v11 -; GFX11-NEXT: v_pk_max_f16 v7, v7, v12 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_v16f16: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_pk_max_num_f16 v8, v8, v8 -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 -; GFX12-NEXT: v_pk_max_num_f16 v9, v9, v9 -; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 -; GFX12-NEXT: v_pk_max_num_f16 v10, v10, v10 -; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2 -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v8 -; GFX12-NEXT: v_pk_max_num_f16 v8, v11, v11 -; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v9 -; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3 -; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v10 -; GFX12-NEXT: v_pk_max_num_f16 v9, v12, v12 -; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4 -; GFX12-NEXT: v_pk_max_num_f16 v10, v13, v13 -; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5 -; GFX12-NEXT: v_pk_max_num_f16 v11, v14, v14 -; GFX12-NEXT: v_pk_max_num_f16 v6, v6, v6 -; GFX12-NEXT: v_pk_max_num_f16 v12, v15, v15 -; GFX12-NEXT: v_pk_max_num_f16 v7, v7, v7 -; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v8 -; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v9 -; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v10 -; GFX12-NEXT: v_pk_max_num_f16 v6, v6, v11 -; GFX12-NEXT: v_pk_max_num_f16 v7, v7, v12 -; GFX12-NEXT: s_setpc_b64 s[30:31] - %result = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> %x, <16 x half> %y) - ret <16 x half> %result -} - -define <32 x half> @v_maximumnum_v32f16(<32 x half> %x, <32 x half> %y) { -; GFX7-LABEL: v_maximumnum_v32f16: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 -; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 -; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 -; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 -; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 -; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8 -; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9 -; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10 -; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11 -; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8 -; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9 -; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10 -; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11 -; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12 -; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13 -; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14 -; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v15 -; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12 -; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v13 -; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14 -; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15 -; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v16 -; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v17 -; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v18 -; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v19 -; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16 -; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17 -; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18 -; GFX7-NEXT: v_cvt_f32_f16_e32 v19, v19 -; GFX7-NEXT: v_cvt_f16_f32_e32 v20, v20 -; GFX7-NEXT: v_cvt_f16_f32_e32 v21, v21 -; GFX7-NEXT: v_cvt_f16_f32_e32 v22, v22 -; GFX7-NEXT: v_cvt_f16_f32_e32 v23, v23 -; GFX7-NEXT: v_cvt_f32_f16_e32 v20, v20 -; GFX7-NEXT: v_cvt_f32_f16_e32 v21, v21 -; GFX7-NEXT: v_cvt_f32_f16_e32 v22, v22 -; GFX7-NEXT: v_cvt_f32_f16_e32 v23, v23 -; GFX7-NEXT: v_cvt_f16_f32_e32 v24, v24 -; GFX7-NEXT: v_cvt_f16_f32_e32 v25, v25 -; GFX7-NEXT: v_cvt_f16_f32_e32 v26, v26 -; GFX7-NEXT: v_cvt_f16_f32_e32 v27, v27 -; GFX7-NEXT: v_cvt_f32_f16_e32 v24, v24 -; GFX7-NEXT: v_cvt_f32_f16_e32 v25, v25 -; GFX7-NEXT: v_cvt_f32_f16_e32 v26, v26 -; GFX7-NEXT: v_cvt_f32_f16_e32 v27, v27 -; GFX7-NEXT: v_cvt_f16_f32_e32 v28, v28 -; GFX7-NEXT: v_cvt_f16_f32_e32 v29, v29 -; GFX7-NEXT: v_cvt_f16_f32_e32 v30, v30 -; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:128 -; GFX7-NEXT: v_cvt_f32_f16_e32 v28, v28 -; GFX7-NEXT: v_cvt_f32_f16_e32 v29, v29 -; GFX7-NEXT: v_cvt_f32_f16_e32 v30, v30 -; GFX7-NEXT: s_waitcnt vmcnt(1) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:8 -; GFX7-NEXT: s_waitcnt vmcnt(1) -; GFX7-NEXT: v_cvt_f16_f32_e32 v32, v32 -; GFX7-NEXT: v_cvt_f32_f16_e32 v32, v32 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v1, v1, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v2, v2, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:16 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v3, v3, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v4, v4, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v5, v5, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v6, v6, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:32 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v7, v7, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v8, v8, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:40 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v9, v9, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v10, v10, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:48 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v11, v11, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v12, v12, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:56 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v13, v13, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v14, v14, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:64 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v15, v15, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v16, v16, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:72 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v17, v17, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v18, v18, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:80 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v19, v19, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v20, v20, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:88 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v21, v21, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v22, v22, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:96 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v23, v23, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v24, v24, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:104 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v25, v25, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v26, v26, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:112 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v27, v27, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v28, v28, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:120 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v29, v29, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:124 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v30, v30, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_max_f32_e32 v31, v31, v32 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_v32f16: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX8-NEXT: v_max_f16_sdwa v38, v27, v27 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v39, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v48, v26, v26 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v49, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v50, v25, v25 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v51, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v40, v22, v22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v41, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v58, v17, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v59, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v17, v17, v17 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX8-NEXT: v_max_f16_sdwa v52, v24, v24 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v53, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v54, v23, v23 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v55, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v42, v21, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v43, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v44, v20, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v45, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v46, v19, v19 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v47, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v56, v18, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v57, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v38, v39, v38 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v39, v49, v48 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v48, v51, v50 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v51, v41, v40 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v40, v59, v58 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_e32 v1, v1, v17 -; GFX8-NEXT: v_max_f16_sdwa v49, v53, v52 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v50, v55, v54 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v52, v43, v42 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v53, v45, v44 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v54, v47, v46 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v55, v57, v56 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v1, v1, v40 -; GFX8-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 -; GFX8-NEXT: v_max_f16_sdwa v32, v30, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v33, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v34, v29, v29 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v35, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v36, v28, v28 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v37, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v32, v33, v32 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v33, v16, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v34, v35, v34 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v35, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v36, v37, v36 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v37, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v15, v15, v15 -; GFX8-NEXT: v_max_f16_sdwa v33, v35, v33 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_e32 v30, v30, v30 -; GFX8-NEXT: v_max_f16_e32 v14, v14, v14 -; GFX8-NEXT: v_max_f16_e32 v29, v29, v29 -; GFX8-NEXT: v_max_f16_e32 v13, v13, v13 -; GFX8-NEXT: v_max_f16_e32 v28, v28, v28 -; GFX8-NEXT: v_max_f16_e32 v12, v12, v12 -; GFX8-NEXT: v_max_f16_e32 v27, v27, v27 -; GFX8-NEXT: v_max_f16_e32 v11, v11, v11 -; GFX8-NEXT: v_max_f16_e32 v26, v26, v26 -; GFX8-NEXT: v_max_f16_e32 v10, v10, v10 -; GFX8-NEXT: v_max_f16_e32 v25, v25, v25 -; GFX8-NEXT: v_max_f16_e32 v9, v9, v9 -; GFX8-NEXT: v_max_f16_e32 v24, v24, v24 -; GFX8-NEXT: v_max_f16_e32 v8, v8, v8 -; GFX8-NEXT: v_max_f16_e32 v23, v23, v23 -; GFX8-NEXT: v_max_f16_e32 v7, v7, v7 -; GFX8-NEXT: v_max_f16_e32 v22, v22, v22 -; GFX8-NEXT: v_max_f16_e32 v6, v6, v6 -; GFX8-NEXT: v_max_f16_e32 v21, v21, v21 -; GFX8-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX8-NEXT: v_max_f16_e32 v20, v20, v20 -; GFX8-NEXT: v_max_f16_e32 v4, v4, v4 -; GFX8-NEXT: v_max_f16_e32 v19, v19, v19 -; GFX8-NEXT: v_max_f16_e32 v3, v3, v3 -; GFX8-NEXT: v_max_f16_e32 v18, v18, v18 -; GFX8-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX8-NEXT: v_max_f16_e32 v16, v16, v16 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_max_f16_e32 v14, v14, v30 -; GFX8-NEXT: v_max_f16_e32 v13, v13, v29 -; GFX8-NEXT: v_max_f16_e32 v12, v12, v28 -; GFX8-NEXT: v_max_f16_e32 v11, v11, v27 -; GFX8-NEXT: v_max_f16_e32 v10, v10, v26 -; GFX8-NEXT: v_max_f16_e32 v9, v9, v25 -; GFX8-NEXT: v_max_f16_e32 v8, v8, v24 -; GFX8-NEXT: v_max_f16_e32 v7, v7, v23 -; GFX8-NEXT: v_max_f16_e32 v6, v6, v22 -; GFX8-NEXT: v_max_f16_e32 v5, v5, v21 -; GFX8-NEXT: v_max_f16_e32 v4, v4, v20 -; GFX8-NEXT: v_max_f16_e32 v3, v3, v19 -; GFX8-NEXT: v_max_f16_e32 v2, v2, v18 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v16 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v33 -; GFX8-NEXT: v_or_b32_e32 v2, v2, v55 -; GFX8-NEXT: v_or_b32_e32 v3, v3, v54 -; GFX8-NEXT: v_or_b32_e32 v4, v4, v53 -; GFX8-NEXT: v_or_b32_e32 v5, v5, v52 -; GFX8-NEXT: v_or_b32_e32 v6, v6, v51 -; GFX8-NEXT: v_or_b32_e32 v7, v7, v50 -; GFX8-NEXT: v_or_b32_e32 v8, v8, v49 -; GFX8-NEXT: v_or_b32_e32 v9, v9, v48 -; GFX8-NEXT: v_or_b32_e32 v10, v10, v39 -; GFX8-NEXT: v_or_b32_e32 v11, v11, v38 -; GFX8-NEXT: v_or_b32_e32 v12, v12, v36 -; GFX8-NEXT: v_or_b32_e32 v13, v13, v34 -; GFX8-NEXT: v_or_b32_e32 v14, v14, v32 -; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v35, v31, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v31, v31, v31 -; GFX8-NEXT: v_max_f16_sdwa v35, v37, v35 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_e32 v15, v15, v31 -; GFX8-NEXT: v_or_b32_e32 v15, v15, v35 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_maximumnum_v32f16: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_pk_max_f16 v16, v16, v16 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v16 -; GFX900-NEXT: v_pk_max_f16 v16, v17, v17 -; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX900-NEXT: v_pk_max_f16 v1, v1, v16 -; GFX900-NEXT: v_pk_max_f16 v16, v18, v18 -; GFX900-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX900-NEXT: v_pk_max_f16 v2, v2, v16 -; GFX900-NEXT: v_pk_max_f16 v16, v19, v19 -; GFX900-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX900-NEXT: v_pk_max_f16 v3, v3, v16 -; GFX900-NEXT: buffer_load_dword v16, off, s[0:3], s32 -; GFX900-NEXT: v_pk_max_f16 v17, v20, v20 -; GFX900-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX900-NEXT: v_pk_max_f16 v18, v21, v21 -; GFX900-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX900-NEXT: v_pk_max_f16 v19, v22, v22 -; GFX900-NEXT: v_pk_max_f16 v6, v6, v6 -; GFX900-NEXT: v_pk_max_f16 v20, v23, v23 -; GFX900-NEXT: v_pk_max_f16 v7, v7, v7 -; GFX900-NEXT: v_pk_max_f16 v21, v24, v24 -; GFX900-NEXT: v_pk_max_f16 v8, v8, v8 -; GFX900-NEXT: v_pk_max_f16 v22, v25, v25 -; GFX900-NEXT: v_pk_max_f16 v9, v9, v9 -; GFX900-NEXT: v_pk_max_f16 v23, v26, v26 -; GFX900-NEXT: v_pk_max_f16 v10, v10, v10 -; GFX900-NEXT: v_pk_max_f16 v24, v27, v27 -; GFX900-NEXT: v_pk_max_f16 v11, v11, v11 -; GFX900-NEXT: v_pk_max_f16 v25, v28, v28 -; GFX900-NEXT: v_pk_max_f16 v12, v12, v12 -; GFX900-NEXT: v_pk_max_f16 v26, v29, v29 -; GFX900-NEXT: v_pk_max_f16 v13, v13, v13 -; GFX900-NEXT: v_pk_max_f16 v27, v30, v30 -; GFX900-NEXT: v_pk_max_f16 v14, v14, v14 -; GFX900-NEXT: v_pk_max_f16 v15, v15, v15 -; GFX900-NEXT: v_pk_max_f16 v4, v4, v17 -; GFX900-NEXT: v_pk_max_f16 v5, v5, v18 -; GFX900-NEXT: v_pk_max_f16 v6, v6, v19 -; GFX900-NEXT: v_pk_max_f16 v7, v7, v20 -; GFX900-NEXT: v_pk_max_f16 v8, v8, v21 -; GFX900-NEXT: v_pk_max_f16 v9, v9, v22 -; GFX900-NEXT: v_pk_max_f16 v10, v10, v23 -; GFX900-NEXT: v_pk_max_f16 v11, v11, v24 -; GFX900-NEXT: v_pk_max_f16 v12, v12, v25 -; GFX900-NEXT: v_pk_max_f16 v13, v13, v26 -; GFX900-NEXT: v_pk_max_f16 v14, v14, v27 -; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_pk_max_f16 v16, v16, v16 -; GFX900-NEXT: v_pk_max_f16 v15, v15, v16 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_maximumnum_v32f16: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: scratch_load_dword v31, off, s32 -; GFX950-NEXT: v_pk_max_f16 v16, v16, v16 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX950-NEXT: v_pk_max_f16 v17, v17, v17 -; GFX950-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX950-NEXT: v_pk_max_f16 v18, v18, v18 -; GFX950-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX950-NEXT: v_pk_max_f16 v19, v19, v19 -; GFX950-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX950-NEXT: v_pk_max_f16 v20, v20, v20 -; GFX950-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX950-NEXT: v_pk_max_f16 v21, v21, v21 -; GFX950-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX950-NEXT: v_pk_max_f16 v22, v22, v22 -; GFX950-NEXT: v_pk_max_f16 v6, v6, v6 -; GFX950-NEXT: v_pk_max_f16 v23, v23, v23 -; GFX950-NEXT: v_pk_max_f16 v7, v7, v7 -; GFX950-NEXT: v_pk_max_f16 v24, v24, v24 -; GFX950-NEXT: v_pk_max_f16 v8, v8, v8 -; GFX950-NEXT: v_pk_max_f16 v25, v25, v25 -; GFX950-NEXT: v_pk_max_f16 v9, v9, v9 -; GFX950-NEXT: v_pk_max_f16 v26, v26, v26 -; GFX950-NEXT: v_pk_max_f16 v10, v10, v10 -; GFX950-NEXT: v_pk_max_f16 v27, v27, v27 -; GFX950-NEXT: v_pk_max_f16 v11, v11, v11 -; GFX950-NEXT: v_pk_max_f16 v28, v28, v28 -; GFX950-NEXT: v_pk_max_f16 v12, v12, v12 -; GFX950-NEXT: v_pk_max_f16 v29, v29, v29 -; GFX950-NEXT: v_pk_max_f16 v13, v13, v13 -; GFX950-NEXT: v_pk_max_f16 v30, v30, v30 -; GFX950-NEXT: v_pk_max_f16 v14, v14, v14 -; GFX950-NEXT: v_pk_max_f16 v15, v15, v15 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v16 -; GFX950-NEXT: v_pk_max_f16 v1, v1, v17 -; GFX950-NEXT: v_pk_max_f16 v2, v2, v18 -; GFX950-NEXT: v_pk_max_f16 v3, v3, v19 -; GFX950-NEXT: v_pk_max_f16 v4, v4, v20 -; GFX950-NEXT: v_pk_max_f16 v5, v5, v21 -; GFX950-NEXT: v_pk_max_f16 v6, v6, v22 -; GFX950-NEXT: v_pk_max_f16 v7, v7, v23 -; GFX950-NEXT: v_pk_max_f16 v8, v8, v24 -; GFX950-NEXT: v_pk_max_f16 v9, v9, v25 -; GFX950-NEXT: v_pk_max_f16 v10, v10, v26 -; GFX950-NEXT: v_pk_max_f16 v11, v11, v27 -; GFX950-NEXT: v_pk_max_f16 v12, v12, v28 -; GFX950-NEXT: v_pk_max_f16 v13, v13, v29 -; GFX950-NEXT: v_pk_max_f16 v14, v14, v30 -; GFX950-NEXT: s_waitcnt vmcnt(0) -; GFX950-NEXT: v_pk_max_f16 v16, v31, v31 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_pk_max_f16 v15, v15, v16 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_v32f16: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 -; GFX10-NEXT: v_pk_max_f16 v16, v16, v16 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX10-NEXT: v_pk_max_f16 v17, v17, v17 -; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX10-NEXT: v_pk_max_f16 v18, v18, v18 -; GFX10-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX10-NEXT: v_pk_max_f16 v19, v19, v19 -; GFX10-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX10-NEXT: v_pk_max_f16 v20, v20, v20 -; GFX10-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX10-NEXT: v_pk_max_f16 v21, v21, v21 -; GFX10-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX10-NEXT: v_pk_max_f16 v22, v22, v22 -; GFX10-NEXT: v_pk_max_f16 v6, v6, v6 -; GFX10-NEXT: v_pk_max_f16 v23, v23, v23 -; GFX10-NEXT: v_pk_max_f16 v7, v7, v7 -; GFX10-NEXT: v_pk_max_f16 v24, v24, v24 -; GFX10-NEXT: v_pk_max_f16 v8, v8, v8 -; GFX10-NEXT: v_pk_max_f16 v25, v25, v25 -; GFX10-NEXT: v_pk_max_f16 v9, v9, v9 -; GFX10-NEXT: v_pk_max_f16 v26, v26, v26 -; GFX10-NEXT: v_pk_max_f16 v10, v10, v10 -; GFX10-NEXT: v_pk_max_f16 v27, v27, v27 -; GFX10-NEXT: v_pk_max_f16 v11, v11, v11 -; GFX10-NEXT: v_pk_max_f16 v28, v28, v28 -; GFX10-NEXT: v_pk_max_f16 v12, v12, v12 -; GFX10-NEXT: v_pk_max_f16 v29, v29, v29 -; GFX10-NEXT: v_pk_max_f16 v13, v13, v13 -; GFX10-NEXT: v_pk_max_f16 v30, v30, v30 -; GFX10-NEXT: v_pk_max_f16 v14, v14, v14 -; GFX10-NEXT: v_pk_max_f16 v15, v15, v15 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v16 -; GFX10-NEXT: v_pk_max_f16 v1, v1, v17 -; GFX10-NEXT: v_pk_max_f16 v2, v2, v18 -; GFX10-NEXT: v_pk_max_f16 v3, v3, v19 -; GFX10-NEXT: v_pk_max_f16 v4, v4, v20 -; GFX10-NEXT: v_pk_max_f16 v5, v5, v21 -; GFX10-NEXT: v_pk_max_f16 v6, v6, v22 -; GFX10-NEXT: v_pk_max_f16 v7, v7, v23 -; GFX10-NEXT: v_pk_max_f16 v8, v8, v24 -; GFX10-NEXT: v_pk_max_f16 v9, v9, v25 -; GFX10-NEXT: v_pk_max_f16 v10, v10, v26 -; GFX10-NEXT: v_pk_max_f16 v11, v11, v27 -; GFX10-NEXT: v_pk_max_f16 v12, v12, v28 -; GFX10-NEXT: v_pk_max_f16 v13, v13, v29 -; GFX10-NEXT: v_pk_max_f16 v14, v14, v30 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_pk_max_f16 v16, v31, v31 -; GFX10-NEXT: v_pk_max_f16 v15, v15, v16 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_v32f16: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: scratch_load_b32 v31, off, s32 -; GFX11-NEXT: v_pk_max_f16 v16, v16, v16 -; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX11-NEXT: v_pk_max_f16 v17, v17, v17 -; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX11-NEXT: v_pk_max_f16 v18, v18, v18 -; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX11-NEXT: v_pk_max_f16 v19, v19, v19 -; GFX11-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX11-NEXT: v_pk_max_f16 v20, v20, v20 -; GFX11-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX11-NEXT: v_pk_max_f16 v21, v21, v21 -; GFX11-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX11-NEXT: v_pk_max_f16 v22, v22, v22 -; GFX11-NEXT: v_pk_max_f16 v6, v6, v6 -; GFX11-NEXT: v_pk_max_f16 v23, v23, v23 -; GFX11-NEXT: v_pk_max_f16 v7, v7, v7 -; GFX11-NEXT: v_pk_max_f16 v24, v24, v24 -; GFX11-NEXT: v_pk_max_f16 v8, v8, v8 -; GFX11-NEXT: v_pk_max_f16 v25, v25, v25 -; GFX11-NEXT: v_pk_max_f16 v9, v9, v9 -; GFX11-NEXT: v_pk_max_f16 v26, v26, v26 -; GFX11-NEXT: v_pk_max_f16 v10, v10, v10 -; GFX11-NEXT: v_pk_max_f16 v27, v27, v27 -; GFX11-NEXT: v_pk_max_f16 v11, v11, v11 -; GFX11-NEXT: v_pk_max_f16 v28, v28, v28 -; GFX11-NEXT: v_pk_max_f16 v12, v12, v12 -; GFX11-NEXT: v_pk_max_f16 v29, v29, v29 -; GFX11-NEXT: v_pk_max_f16 v13, v13, v13 -; GFX11-NEXT: v_pk_max_f16 v30, v30, v30 -; GFX11-NEXT: v_pk_max_f16 v14, v14, v14 -; GFX11-NEXT: v_pk_max_f16 v15, v15, v15 -; GFX11-NEXT: v_pk_max_f16 v0, v0, v16 -; GFX11-NEXT: v_pk_max_f16 v1, v1, v17 -; GFX11-NEXT: v_pk_max_f16 v2, v2, v18 -; GFX11-NEXT: v_pk_max_f16 v3, v3, v19 -; GFX11-NEXT: v_pk_max_f16 v4, v4, v20 -; GFX11-NEXT: v_pk_max_f16 v5, v5, v21 -; GFX11-NEXT: v_pk_max_f16 v6, v6, v22 -; GFX11-NEXT: v_pk_max_f16 v7, v7, v23 -; GFX11-NEXT: v_pk_max_f16 v8, v8, v24 -; GFX11-NEXT: v_pk_max_f16 v9, v9, v25 -; GFX11-NEXT: v_pk_max_f16 v10, v10, v26 -; GFX11-NEXT: v_pk_max_f16 v11, v11, v27 -; GFX11-NEXT: v_pk_max_f16 v12, v12, v28 -; GFX11-NEXT: v_pk_max_f16 v13, v13, v29 -; GFX11-NEXT: v_pk_max_f16 v14, v14, v30 -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_pk_max_f16 v16, v31, v31 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_pk_max_f16 v15, v15, v16 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_v32f16: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: scratch_load_b32 v31, off, s32 -; GFX12-NEXT: v_pk_max_num_f16 v16, v16, v16 -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 -; GFX12-NEXT: v_pk_max_num_f16 v17, v17, v17 -; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 -; GFX12-NEXT: v_pk_max_num_f16 v18, v18, v18 -; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2 -; GFX12-NEXT: v_pk_max_num_f16 v19, v19, v19 -; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3 -; GFX12-NEXT: v_pk_max_num_f16 v20, v20, v20 -; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4 -; GFX12-NEXT: v_pk_max_num_f16 v21, v21, v21 -; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5 -; GFX12-NEXT: v_pk_max_num_f16 v22, v22, v22 -; GFX12-NEXT: v_pk_max_num_f16 v6, v6, v6 -; GFX12-NEXT: v_pk_max_num_f16 v23, v23, v23 -; GFX12-NEXT: v_pk_max_num_f16 v7, v7, v7 -; GFX12-NEXT: v_pk_max_num_f16 v24, v24, v24 -; GFX12-NEXT: v_pk_max_num_f16 v8, v8, v8 -; GFX12-NEXT: v_pk_max_num_f16 v25, v25, v25 -; GFX12-NEXT: v_pk_max_num_f16 v9, v9, v9 -; GFX12-NEXT: v_pk_max_num_f16 v26, v26, v26 -; GFX12-NEXT: v_pk_max_num_f16 v10, v10, v10 -; GFX12-NEXT: v_pk_max_num_f16 v27, v27, v27 -; GFX12-NEXT: v_pk_max_num_f16 v11, v11, v11 -; GFX12-NEXT: v_pk_max_num_f16 v28, v28, v28 -; GFX12-NEXT: v_pk_max_num_f16 v12, v12, v12 -; GFX12-NEXT: v_pk_max_num_f16 v29, v29, v29 -; GFX12-NEXT: v_pk_max_num_f16 v13, v13, v13 -; GFX12-NEXT: v_pk_max_num_f16 v30, v30, v30 -; GFX12-NEXT: v_pk_max_num_f16 v14, v14, v14 -; GFX12-NEXT: v_pk_max_num_f16 v15, v15, v15 -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v16 -; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v17 -; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v18 -; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v19 -; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v20 -; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v21 -; GFX12-NEXT: v_pk_max_num_f16 v6, v6, v22 -; GFX12-NEXT: v_pk_max_num_f16 v7, v7, v23 -; GFX12-NEXT: v_pk_max_num_f16 v8, v8, v24 -; GFX12-NEXT: v_pk_max_num_f16 v9, v9, v25 -; GFX12-NEXT: v_pk_max_num_f16 v10, v10, v26 -; GFX12-NEXT: v_pk_max_num_f16 v11, v11, v27 -; GFX12-NEXT: v_pk_max_num_f16 v12, v12, v28 -; GFX12-NEXT: v_pk_max_num_f16 v13, v13, v29 -; GFX12-NEXT: v_pk_max_num_f16 v14, v14, v30 -; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_pk_max_num_f16 v16, v31, v31 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_pk_max_num_f16 v15, v15, v16 -; GFX12-NEXT: s_setpc_b64 s[30:31] - %result = call <32 x half> @llvm.maximumnum.v32f16(<32 x half> %x, <32 x half> %y) - ret <32 x half> %result -} - -define <2 x float> @v_maximumnum_v2f32(<2 x float> %x, <2 x float> %y) { -; GFX7-LABEL: v_maximumnum_v2f32: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2 -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v2 -; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v3 -; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 -; GFX7-NEXT: v_max_f32_e32 v1, v1, v2 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_v2f32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2 -; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX8-NEXT: v_max_f32_e32 v0, v0, v2 -; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v3 -; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 -; GFX8-NEXT: v_max_f32_e32 v1, v1, v2 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_maximumnum_v2f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v2 -; GFX9-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v2 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_v2f32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX10-NEXT: v_max_f32_e32 v3, v3, v3 -; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX10-NEXT: v_max_f32_e32 v0, v0, v2 -; GFX10-NEXT: v_max_f32_e32 v1, v1, v3 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_v2f32: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 -; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_v2f32: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 -; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3 -; GFX12-NEXT: s_setpc_b64 s[30:31] +define <16 x half> @v_maximumnum_v16f16(<16 x half> %x, <16 x half> %y) { +; GFX7-SDAG-LABEL: v_maximumnum_v16f16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v16 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v16 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v17 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v17, v20 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v16 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v18 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v18, v21 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v17, v17 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v18, v18 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v16 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v19 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v19, v22 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v20, v23 +; GFX7-SDAG-NEXT: v_max_f32_e32 v4, v4, v17 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v19, v19 +; GFX7-SDAG-NEXT: v_max_f32_e32 v5, v5, v18 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v8, v8 +; GFX7-SDAG-NEXT: v_max_f32_e32 v3, v3, v16 +; GFX7-SDAG-NEXT: buffer_load_dword v16, off, s[0:3], s32 +; GFX7-SDAG-NEXT: v_max_f32_e32 v6, v6, v19 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v17, v24 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v9, v9 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v18, v25 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v10, v10 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v19, v26 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v20, v20 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v17, v17 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v9, v9 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v18, v18 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v10, v10 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v19, v19 +; GFX7-SDAG-NEXT: v_max_f32_e32 v7, v7, v20 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v11, v11 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v20, v27 +; GFX7-SDAG-NEXT: v_max_f32_e32 v8, v8, v17 +; GFX7-SDAG-NEXT: v_max_f32_e32 v9, v9, v18 +; GFX7-SDAG-NEXT: v_max_f32_e32 v10, v10, v19 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v12, v12 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v17, v28 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v13, v13 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v18, v29 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v14, v14 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v19, v30 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v15, v15 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v11, v11 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v20, v20 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v12, v12 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v17, v17 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v13, v13 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v18, v18 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v14, v14 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v19, v19 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v15, v15 +; GFX7-SDAG-NEXT: v_max_f32_e32 v11, v11, v20 +; GFX7-SDAG-NEXT: v_max_f32_e32 v12, v12, v17 +; GFX7-SDAG-NEXT: v_max_f32_e32 v13, v13, v18 +; GFX7-SDAG-NEXT: v_max_f32_e32 v14, v14, v19 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v16 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16 +; GFX7-SDAG-NEXT: v_max_f32_e32 v15, v15, v16 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_v16f16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v16 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v16 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v17 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v17, v20 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v16 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v18 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v18, v21 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-GISEL-NEXT: v_max_f32_e32 v4, v4, v17 +; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v16 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v19 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v19, v22 +; GFX7-GISEL-NEXT: v_max_f32_e32 v5, v5, v18 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-GISEL-NEXT: v_max_f32_e32 v3, v3, v16 +; GFX7-GISEL-NEXT: buffer_load_dword v16, off, s[0:3], s32 +; GFX7-GISEL-NEXT: v_max_f32_e32 v6, v6, v19 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v17, v23 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v18, v24 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v9 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v19, v25 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v10, v10 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v20, v26 +; GFX7-GISEL-NEXT: v_max_f32_e32 v7, v7, v17 +; GFX7-GISEL-NEXT: v_max_f32_e32 v8, v8, v18 +; GFX7-GISEL-NEXT: v_max_f32_e32 v9, v9, v19 +; GFX7-GISEL-NEXT: v_max_f32_e32 v10, v10, v20 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v11, v11 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v17, v27 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v12, v12 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v18, v28 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v13, v13 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v19, v29 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v14, v14 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v20, v30 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v15, v15 +; GFX7-GISEL-NEXT: v_max_f32_e32 v11, v11, v17 +; GFX7-GISEL-NEXT: v_max_f32_e32 v12, v12, v18 +; GFX7-GISEL-NEXT: v_max_f32_e32 v13, v13, v19 +; GFX7-GISEL-NEXT: v_max_f32_e32 v14, v14, v20 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v8, v8 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v9, v9 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v10, v10 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v11, v11 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v12, v12 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v13, v13 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v14, v14 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v16 +; GFX7-GISEL-NEXT: v_max_f32_e32 v15, v15, v16 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v15, v15 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_v16f16: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_sdwa v16, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v17, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v16, v17, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v17, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v18, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v17, v18, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v18, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v19, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v18, v19, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v19, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v20, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v19, v20, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v20, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v21, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v20, v21, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v21, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v22, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v21, v22, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v22, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v23, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v22, v23, v22 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v23, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v24, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v15, v15, v15 +; GFX8-SDAG-NEXT: v_max_f16_e32 v7, v7, v7 +; GFX8-SDAG-NEXT: v_max_f16_e32 v14, v14, v14 +; GFX8-SDAG-NEXT: v_max_f16_e32 v6, v6, v6 +; GFX8-SDAG-NEXT: v_max_f16_e32 v13, v13, v13 +; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX8-SDAG-NEXT: v_max_f16_e32 v12, v12, v12 +; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX8-SDAG-NEXT: v_max_f16_e32 v11, v11, v11 +; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3 +; GFX8-SDAG-NEXT: v_max_f16_e32 v10, v10, v10 +; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX8-SDAG-NEXT: v_max_f16_e32 v9, v9, v9 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v8, v8, v8 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v23, v24, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_e32 v7, v7, v15 +; GFX8-SDAG-NEXT: v_max_f16_e32 v6, v6, v14 +; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v13 +; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v12 +; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v11 +; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v10 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v9 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v8 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v23 +; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v22 +; GFX8-SDAG-NEXT: v_or_b32_e32 v2, v2, v21 +; GFX8-SDAG-NEXT: v_or_b32_e32 v3, v3, v20 +; GFX8-SDAG-NEXT: v_or_b32_e32 v4, v4, v19 +; GFX8-SDAG-NEXT: v_or_b32_e32 v5, v5, v18 +; GFX8-SDAG-NEXT: v_or_b32_e32 v6, v6, v17 +; GFX8-SDAG-NEXT: v_or_b32_e32 v7, v7, v16 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_v16f16: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v16, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v19, v8, v8 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v8, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v1, v1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v8, v9, v9 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v9, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v18, v2, v2 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v9, v10, v10 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v10, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v16, v16, v19 +; GFX8-GISEL-NEXT: v_max_f16_e32 v19, v3, v3 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v10, v11, v11 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v11, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v8, v17, v8 +; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v4, v4 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v11, v12, v12 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v12, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v9, v18, v9 +; GFX8-GISEL-NEXT: v_max_f16_e32 v18, v5, v5 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v12, v13, v13 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v13, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v10, v19, v10 +; GFX8-GISEL-NEXT: v_max_f16_e32 v19, v6, v6 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v11, v17, v11 +; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v7, v7 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v12, v18, v12 +; GFX8-GISEL-NEXT: v_max_f16_e32 v18, v14, v14 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v14, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v13, v15, v15 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v15, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v18, v19, v18 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v6, v6, v14 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v13, v17, v13 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v7, v7, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v16, v0 +; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v8, v1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v2, v9, v2 +; GFX8-GISEL-NEXT: v_or_b32_e32 v3, v10, v3 +; GFX8-GISEL-NEXT: v_or_b32_e32 v4, v11, v4 +; GFX8-GISEL-NEXT: v_or_b32_e32 v5, v12, v5 +; GFX8-GISEL-NEXT: v_or_b32_e32 v6, v18, v6 +; GFX8-GISEL-NEXT: v_or_b32_e32 v7, v13, v7 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_maximumnum_v16f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v8 +; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v9, v9 +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v8 +; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v10, v10 +; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v8 +; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v11, v11 +; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v8 +; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v12, v12 +; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v4, v8 +; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v13, v13 +; GFX900-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX900-SDAG-NEXT: v_pk_max_f16 v5, v5, v8 +; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v14, v14 +; GFX900-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX900-SDAG-NEXT: v_pk_max_f16 v6, v6, v8 +; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v15, v15 +; GFX900-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX900-SDAG-NEXT: v_pk_max_f16 v7, v7, v8 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_maximumnum_v16f16: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX9-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX9-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX9-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX9-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX9-GISEL-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX9-GISEL-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX9-GISEL-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX9-GISEL-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX9-GISEL-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX9-GISEL-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX9-GISEL-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX9-GISEL-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v8 +; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v9 +; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v10 +; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v11 +; GFX9-GISEL-NEXT: v_pk_max_f16 v4, v4, v12 +; GFX9-GISEL-NEXT: v_pk_max_f16 v5, v5, v13 +; GFX9-GISEL-NEXT: v_pk_max_f16 v6, v6, v14 +; GFX9-GISEL-NEXT: v_pk_max_f16 v7, v7, v15 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_v16f16: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v8 +; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v9, v9 +; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v8 +; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v10, v10 +; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v8 +; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v11, v11 +; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v8 +; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v12, v12 +; GFX950-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v8 +; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v13, v13 +; GFX950-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX950-SDAG-NEXT: v_pk_max_f16 v5, v5, v8 +; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v14, v14 +; GFX950-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX950-SDAG-NEXT: v_pk_max_f16 v6, v6, v8 +; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v15, v15 +; GFX950-SDAG-NEXT: s_nop 0 +; GFX950-SDAG-NEXT: v_pk_max_f16 v7, v7, v8 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_v16f16: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-SDAG-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-SDAG-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v8 +; GFX10-SDAG-NEXT: v_pk_max_f16 v8, v11, v11 +; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v9 +; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v10 +; GFX10-SDAG-NEXT: v_pk_max_f16 v9, v12, v12 +; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX10-SDAG-NEXT: v_pk_max_f16 v10, v13, v13 +; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX10-SDAG-NEXT: v_pk_max_f16 v11, v14, v14 +; GFX10-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX10-SDAG-NEXT: v_pk_max_f16 v12, v15, v15 +; GFX10-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v8 +; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v9 +; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v10 +; GFX10-SDAG-NEXT: v_pk_max_f16 v6, v6, v11 +; GFX10-SDAG-NEXT: v_pk_max_f16 v7, v7, v12 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_v16f16: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX10-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX10-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX10-GISEL-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX10-GISEL-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX10-GISEL-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX10-GISEL-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX10-GISEL-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX10-GISEL-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX10-GISEL-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX10-GISEL-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v8 +; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v9 +; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v10 +; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v11 +; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v12 +; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v13 +; GFX10-GISEL-NEXT: v_pk_max_f16 v6, v6, v14 +; GFX10-GISEL-NEXT: v_pk_max_f16 v7, v7, v15 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_v16f16: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-SDAG-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-SDAG-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v8 +; GFX11-SDAG-NEXT: v_pk_max_f16 v8, v11, v11 +; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v9 +; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v10 +; GFX11-SDAG-NEXT: v_pk_max_f16 v9, v12, v12 +; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX11-SDAG-NEXT: v_pk_max_f16 v10, v13, v13 +; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX11-SDAG-NEXT: v_pk_max_f16 v11, v14, v14 +; GFX11-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX11-SDAG-NEXT: v_pk_max_f16 v12, v15, v15 +; GFX11-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v8 +; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v9 +; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v10 +; GFX11-SDAG-NEXT: v_pk_max_f16 v6, v6, v11 +; GFX11-SDAG-NEXT: v_pk_max_f16 v7, v7, v12 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_v16f16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX11-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX11-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX11-GISEL-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX11-GISEL-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX11-GISEL-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX11-GISEL-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX11-GISEL-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX11-GISEL-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX11-GISEL-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX11-GISEL-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v8 +; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v9 +; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v10 +; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v11 +; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v12 +; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v13 +; GFX11-GISEL-NEXT: v_pk_max_f16 v6, v6, v14 +; GFX11-GISEL-NEXT: v_pk_max_f16 v7, v7, v15 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_v16f16: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v8, v8, v8 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v9, v9, v9 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v10, v10, v10 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v8 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v8, v11, v11 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v9 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v10 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v9, v12, v12 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v10, v13, v13 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v11, v14, v14 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v12, v15, v15 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v8 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v9 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v10 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v11 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v12 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_v16f16: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v8, v8, v8 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v9, v9, v9 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v10, v10, v10 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v11, v11, v11 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v12, v12, v12 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v13, v13, v13 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v14, v14, v14 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v15, v15, v15 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v8 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v9 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v10 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v11 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v12 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v13 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v14 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v15 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> %x, <16 x half> %y) + ret <16 x half> %result +} + +define <32 x half> @v_maximumnum_v32f16(<32 x half> %x, <32 x half> %y) { +; GFX7-SDAG-LABEL: v_maximumnum_v32f16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v8, v8 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v9, v9 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v10, v10 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v11, v11 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v9, v9 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v10, v10 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v11, v11 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v12, v12 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v13, v13 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v14, v14 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v15, v15 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v12, v12 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v13, v13 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v14, v14 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v15, v15 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v16 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v17, v17 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v18, v18 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v19, v19 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v17, v17 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v18, v18 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v19, v19 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v20, v20 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v21, v21 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v22, v22 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v23, v23 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v20, v20 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v21, v21 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v22, v22 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v23, v23 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v24, v24 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v25, v25 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v26, v26 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v27, v27 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v24, v24 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v25, v25 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v26, v26 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v27, v27 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v28, v28 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v29, v29 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v30, v30 +; GFX7-SDAG-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:128 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v28, v28 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v29, v29 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v30, v30 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:8 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v32, v32 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v32, v32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v3, v3, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v4, v4, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v5, v5, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v6, v6, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v7, v7, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v8, v8, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:40 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v9, v9, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v10, v10, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:48 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v11, v11, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v12, v12, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:56 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v13, v13, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v14, v14, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v15, v15, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v16, v16, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:72 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v17, v17, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v18, v18, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:80 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v19, v19, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v20, v20, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:88 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v21, v21, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v22, v22, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:96 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v23, v23, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v24, v24, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:104 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v25, v25, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v26, v26, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:112 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v27, v27, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v28, v28, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:120 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v29, v29, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:124 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v30, v30, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_max_f32_e32 v31, v31, v32 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_v32f16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v9 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v10, v10 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v11, v11 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v12, v12 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v13, v13 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v14, v14 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v15, v15 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v16 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v17, v17 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v18, v18 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v19, v19 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v20, v20 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v21, v21 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v22, v22 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v23, v23 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v24, v24 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v25, v25 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v26, v26 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v27, v27 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v28, v28 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v29, v29 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v30, v30 +; GFX7-GISEL-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:128 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:8 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v32, v32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:16 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v3, v3, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v4, v4, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v5, v5, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v6, v6, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:32 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v7, v7, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v8, v8, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:40 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v8, v8 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v9, v9, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v9, v9 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v10, v10, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:48 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v10, v10 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v11, v11, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v11, v11 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v12, v12, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:56 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v12, v12 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v13, v13, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v13, v13 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v14, v14, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:64 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v14, v14 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v15, v15, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v15, v15 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v16, v16, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:72 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v16, v16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v17, v17, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v17, v17 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v18, v18, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:80 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v18, v18 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v19, v19, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v19, v19 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v20, v20, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:88 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v20, v20 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v21, v21, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v21, v21 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v22, v22, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:96 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v22, v22 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v23, v23, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v23, v23 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v24, v24, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:104 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v24, v24 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v25, v25, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v25, v25 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v26, v26, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:112 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v26, v26 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v27, v27, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v27, v27 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v28, v28, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:120 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v28, v28 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v29, v29, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:124 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v29, v29 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v30, v30, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v30, v30 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_max_f32_e32 v31, v31, v32 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_v32f16: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: v_max_f16_sdwa v38, v27, v27 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v39, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v48, v26, v26 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v49, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v50, v25, v25 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v51, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v40, v22, v22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v41, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v58, v17, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v59, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v17, v17, v17 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v52, v24, v24 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v53, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v54, v23, v23 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v55, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v42, v21, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v43, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v44, v20, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v45, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v46, v19, v19 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v47, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v56, v18, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v57, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v38, v39, v38 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v39, v49, v48 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v48, v51, v50 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v51, v41, v40 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v40, v59, v58 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v17 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v49, v53, v52 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v50, v55, v54 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v52, v43, v42 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v53, v45, v44 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v54, v47, v46 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v55, v57, v56 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v40 +; GFX8-SDAG-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v32, v30, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v33, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v34, v29, v29 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v35, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v36, v28, v28 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v37, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v32, v33, v32 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v33, v16, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v34, v35, v34 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v35, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v36, v37, v36 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v37, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v15, v15, v15 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v33, v35, v33 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_e32 v30, v30, v30 +; GFX8-SDAG-NEXT: v_max_f16_e32 v14, v14, v14 +; GFX8-SDAG-NEXT: v_max_f16_e32 v29, v29, v29 +; GFX8-SDAG-NEXT: v_max_f16_e32 v13, v13, v13 +; GFX8-SDAG-NEXT: v_max_f16_e32 v28, v28, v28 +; GFX8-SDAG-NEXT: v_max_f16_e32 v12, v12, v12 +; GFX8-SDAG-NEXT: v_max_f16_e32 v27, v27, v27 +; GFX8-SDAG-NEXT: v_max_f16_e32 v11, v11, v11 +; GFX8-SDAG-NEXT: v_max_f16_e32 v26, v26, v26 +; GFX8-SDAG-NEXT: v_max_f16_e32 v10, v10, v10 +; GFX8-SDAG-NEXT: v_max_f16_e32 v25, v25, v25 +; GFX8-SDAG-NEXT: v_max_f16_e32 v9, v9, v9 +; GFX8-SDAG-NEXT: v_max_f16_e32 v24, v24, v24 +; GFX8-SDAG-NEXT: v_max_f16_e32 v8, v8, v8 +; GFX8-SDAG-NEXT: v_max_f16_e32 v23, v23, v23 +; GFX8-SDAG-NEXT: v_max_f16_e32 v7, v7, v7 +; GFX8-SDAG-NEXT: v_max_f16_e32 v22, v22, v22 +; GFX8-SDAG-NEXT: v_max_f16_e32 v6, v6, v6 +; GFX8-SDAG-NEXT: v_max_f16_e32 v21, v21, v21 +; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX8-SDAG-NEXT: v_max_f16_e32 v20, v20, v20 +; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX8-SDAG-NEXT: v_max_f16_e32 v19, v19, v19 +; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3 +; GFX8-SDAG-NEXT: v_max_f16_e32 v18, v18, v18 +; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX8-SDAG-NEXT: v_max_f16_e32 v16, v16, v16 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_max_f16_e32 v14, v14, v30 +; GFX8-SDAG-NEXT: v_max_f16_e32 v13, v13, v29 +; GFX8-SDAG-NEXT: v_max_f16_e32 v12, v12, v28 +; GFX8-SDAG-NEXT: v_max_f16_e32 v11, v11, v27 +; GFX8-SDAG-NEXT: v_max_f16_e32 v10, v10, v26 +; GFX8-SDAG-NEXT: v_max_f16_e32 v9, v9, v25 +; GFX8-SDAG-NEXT: v_max_f16_e32 v8, v8, v24 +; GFX8-SDAG-NEXT: v_max_f16_e32 v7, v7, v23 +; GFX8-SDAG-NEXT: v_max_f16_e32 v6, v6, v22 +; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v21 +; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v20 +; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v19 +; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v18 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v16 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v33 +; GFX8-SDAG-NEXT: v_or_b32_e32 v2, v2, v55 +; GFX8-SDAG-NEXT: v_or_b32_e32 v3, v3, v54 +; GFX8-SDAG-NEXT: v_or_b32_e32 v4, v4, v53 +; GFX8-SDAG-NEXT: v_or_b32_e32 v5, v5, v52 +; GFX8-SDAG-NEXT: v_or_b32_e32 v6, v6, v51 +; GFX8-SDAG-NEXT: v_or_b32_e32 v7, v7, v50 +; GFX8-SDAG-NEXT: v_or_b32_e32 v8, v8, v49 +; GFX8-SDAG-NEXT: v_or_b32_e32 v9, v9, v48 +; GFX8-SDAG-NEXT: v_or_b32_e32 v10, v10, v39 +; GFX8-SDAG-NEXT: v_or_b32_e32 v11, v11, v38 +; GFX8-SDAG-NEXT: v_or_b32_e32 v12, v12, v36 +; GFX8-SDAG-NEXT: v_or_b32_e32 v13, v13, v34 +; GFX8-SDAG-NEXT: v_or_b32_e32 v14, v14, v32 +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_sdwa v35, v31, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v31, v31, v31 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v35, v37, v35 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_e32 v15, v15, v31 +; GFX8-SDAG-NEXT: v_or_b32_e32 v15, v15, v35 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_v32f16: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v31, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v16, v16 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v16, v16, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v31, v31, v32 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v16, v1, v1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v17, v17 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v17, v17, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v16, v16, v32 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v2, v2 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v18, v18 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v18, v18, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v17, v32 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v18, v3, v3 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v19, v19 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v19, v19, v19 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v18, v18, v32 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v19, v4, v4 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v20, v20 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v20, v20, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v19, v19, v32 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v20, v5, v5 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v21, v21 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v21, v21, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v20, v20, v32 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v21, v6, v6 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v22, v22 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v22, v22, v22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v21, v21, v32 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v6, v6, v22 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v22, v7, v7 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v23, v23 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v23, v23, v23 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v22, v22, v32 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v7, v7, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v23, v8, v8 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v24, v24 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v8, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v24, v24, v24 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v23, v23, v32 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v8, v8, v24 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v24, v9, v9 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v25, v25 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v9, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v25, v25, v25 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v24, v24, v32 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v9, v9, v25 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v25, v10, v10 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v26, v26 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v10, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v26, v26, v26 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v25, v25, v32 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v10, v10, v26 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v26, v11, v11 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v27, v27 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v11, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v27, v27, v27 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v26, v26, v32 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v11, v11, v27 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v27, v12, v12 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v28, v28 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v12, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v28, v28, v28 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v27, v27, v32 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v12, v12, v28 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v28, v13, v13 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v29, v29 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v13, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v29, v29, v29 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v28, v28, v32 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v13, v13, v29 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v29, v14, v14 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v30, v30 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v14, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v30, v30, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v14, v14, v30 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: buffer_load_dword v30, off, s[0:3], s32 +; GFX8-GISEL-NEXT: v_max_f16_e32 v29, v29, v32 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v15, v15 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v15, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v31, v0 +; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v16, v1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v2, v17, v2 +; GFX8-GISEL-NEXT: v_or_b32_e32 v3, v18, v3 +; GFX8-GISEL-NEXT: v_or_b32_e32 v4, v19, v4 +; GFX8-GISEL-NEXT: v_or_b32_e32 v5, v20, v5 +; GFX8-GISEL-NEXT: v_or_b32_e32 v6, v21, v6 +; GFX8-GISEL-NEXT: v_or_b32_e32 v7, v22, v7 +; GFX8-GISEL-NEXT: v_or_b32_e32 v8, v23, v8 +; GFX8-GISEL-NEXT: v_or_b32_e32 v9, v24, v9 +; GFX8-GISEL-NEXT: v_or_b32_e32 v10, v25, v10 +; GFX8-GISEL-NEXT: v_or_b32_e32 v11, v26, v11 +; GFX8-GISEL-NEXT: v_or_b32_e32 v12, v27, v12 +; GFX8-GISEL-NEXT: v_or_b32_e32 v13, v28, v13 +; GFX8-GISEL-NEXT: v_or_b32_e32 v14, v29, v14 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v33, v30, v30 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v30, v30, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v32, v33 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v15, v15, v30 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_e32 v15, v32, v15 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_maximumnum_v32f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v16, v16 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v16 +; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v17, v17 +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v16 +; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v18, v18 +; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v16 +; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v19, v19 +; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v16 +; GFX900-SDAG-NEXT: buffer_load_dword v16, off, s[0:3], s32 +; GFX900-SDAG-NEXT: v_pk_max_f16 v17, v20, v20 +; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX900-SDAG-NEXT: v_pk_max_f16 v18, v21, v21 +; GFX900-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX900-SDAG-NEXT: v_pk_max_f16 v19, v22, v22 +; GFX900-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX900-SDAG-NEXT: v_pk_max_f16 v20, v23, v23 +; GFX900-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX900-SDAG-NEXT: v_pk_max_f16 v21, v24, v24 +; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX900-SDAG-NEXT: v_pk_max_f16 v22, v25, v25 +; GFX900-SDAG-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX900-SDAG-NEXT: v_pk_max_f16 v23, v26, v26 +; GFX900-SDAG-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX900-SDAG-NEXT: v_pk_max_f16 v24, v27, v27 +; GFX900-SDAG-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX900-SDAG-NEXT: v_pk_max_f16 v25, v28, v28 +; GFX900-SDAG-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX900-SDAG-NEXT: v_pk_max_f16 v26, v29, v29 +; GFX900-SDAG-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX900-SDAG-NEXT: v_pk_max_f16 v27, v30, v30 +; GFX900-SDAG-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX900-SDAG-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v4, v17 +; GFX900-SDAG-NEXT: v_pk_max_f16 v5, v5, v18 +; GFX900-SDAG-NEXT: v_pk_max_f16 v6, v6, v19 +; GFX900-SDAG-NEXT: v_pk_max_f16 v7, v7, v20 +; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v8, v21 +; GFX900-SDAG-NEXT: v_pk_max_f16 v9, v9, v22 +; GFX900-SDAG-NEXT: v_pk_max_f16 v10, v10, v23 +; GFX900-SDAG-NEXT: v_pk_max_f16 v11, v11, v24 +; GFX900-SDAG-NEXT: v_pk_max_f16 v12, v12, v25 +; GFX900-SDAG-NEXT: v_pk_max_f16 v13, v13, v26 +; GFX900-SDAG-NEXT: v_pk_max_f16 v14, v14, v27 +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v16, v16 +; GFX900-SDAG-NEXT: v_pk_max_f16 v15, v15, v16 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_v32f16: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v16, v16 +; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v16 +; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v17, v17 +; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v16 +; GFX900-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v18, v18 +; GFX900-GISEL-NEXT: v_pk_max_f16 v2, v2, v16 +; GFX900-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v19, v19 +; GFX900-GISEL-NEXT: v_pk_max_f16 v3, v3, v16 +; GFX900-GISEL-NEXT: buffer_load_dword v16, off, s[0:3], s32 +; GFX900-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX900-GISEL-NEXT: v_pk_max_f16 v17, v20, v20 +; GFX900-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX900-GISEL-NEXT: v_pk_max_f16 v18, v21, v21 +; GFX900-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX900-GISEL-NEXT: v_pk_max_f16 v19, v22, v22 +; GFX900-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX900-GISEL-NEXT: v_pk_max_f16 v20, v23, v23 +; GFX900-GISEL-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX900-GISEL-NEXT: v_pk_max_f16 v21, v24, v24 +; GFX900-GISEL-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX900-GISEL-NEXT: v_pk_max_f16 v22, v25, v25 +; GFX900-GISEL-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX900-GISEL-NEXT: v_pk_max_f16 v23, v26, v26 +; GFX900-GISEL-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX900-GISEL-NEXT: v_pk_max_f16 v24, v27, v27 +; GFX900-GISEL-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX900-GISEL-NEXT: v_pk_max_f16 v25, v28, v28 +; GFX900-GISEL-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX900-GISEL-NEXT: v_pk_max_f16 v26, v29, v29 +; GFX900-GISEL-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX900-GISEL-NEXT: v_pk_max_f16 v27, v30, v30 +; GFX900-GISEL-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX900-GISEL-NEXT: v_pk_max_f16 v4, v4, v17 +; GFX900-GISEL-NEXT: v_pk_max_f16 v5, v5, v18 +; GFX900-GISEL-NEXT: v_pk_max_f16 v6, v6, v19 +; GFX900-GISEL-NEXT: v_pk_max_f16 v7, v7, v20 +; GFX900-GISEL-NEXT: v_pk_max_f16 v8, v8, v21 +; GFX900-GISEL-NEXT: v_pk_max_f16 v9, v9, v22 +; GFX900-GISEL-NEXT: v_pk_max_f16 v10, v10, v23 +; GFX900-GISEL-NEXT: v_pk_max_f16 v11, v11, v24 +; GFX900-GISEL-NEXT: v_pk_max_f16 v12, v12, v25 +; GFX900-GISEL-NEXT: v_pk_max_f16 v13, v13, v26 +; GFX900-GISEL-NEXT: v_pk_max_f16 v14, v14, v27 +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v16, v16 +; GFX900-GISEL-NEXT: v_pk_max_f16 v15, v15, v16 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_v32f16: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: scratch_load_dword v31, off, s32 +; GFX950-SDAG-NEXT: v_pk_max_f16 v16, v16, v16 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-SDAG-NEXT: v_pk_max_f16 v17, v17, v17 +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-SDAG-NEXT: v_pk_max_f16 v18, v18, v18 +; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX950-SDAG-NEXT: v_pk_max_f16 v19, v19, v19 +; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX950-SDAG-NEXT: v_pk_max_f16 v20, v20, v20 +; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX950-SDAG-NEXT: v_pk_max_f16 v21, v21, v21 +; GFX950-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX950-SDAG-NEXT: v_pk_max_f16 v22, v22, v22 +; GFX950-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX950-SDAG-NEXT: v_pk_max_f16 v23, v23, v23 +; GFX950-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX950-SDAG-NEXT: v_pk_max_f16 v24, v24, v24 +; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX950-SDAG-NEXT: v_pk_max_f16 v25, v25, v25 +; GFX950-SDAG-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX950-SDAG-NEXT: v_pk_max_f16 v26, v26, v26 +; GFX950-SDAG-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX950-SDAG-NEXT: v_pk_max_f16 v27, v27, v27 +; GFX950-SDAG-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX950-SDAG-NEXT: v_pk_max_f16 v28, v28, v28 +; GFX950-SDAG-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX950-SDAG-NEXT: v_pk_max_f16 v29, v29, v29 +; GFX950-SDAG-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX950-SDAG-NEXT: v_pk_max_f16 v30, v30, v30 +; GFX950-SDAG-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX950-SDAG-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v16 +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v17 +; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v18 +; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v19 +; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v20 +; GFX950-SDAG-NEXT: v_pk_max_f16 v5, v5, v21 +; GFX950-SDAG-NEXT: v_pk_max_f16 v6, v6, v22 +; GFX950-SDAG-NEXT: v_pk_max_f16 v7, v7, v23 +; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v8, v24 +; GFX950-SDAG-NEXT: v_pk_max_f16 v9, v9, v25 +; GFX950-SDAG-NEXT: v_pk_max_f16 v10, v10, v26 +; GFX950-SDAG-NEXT: v_pk_max_f16 v11, v11, v27 +; GFX950-SDAG-NEXT: v_pk_max_f16 v12, v12, v28 +; GFX950-SDAG-NEXT: v_pk_max_f16 v13, v13, v29 +; GFX950-SDAG-NEXT: v_pk_max_f16 v14, v14, v30 +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX950-SDAG-NEXT: v_pk_max_f16 v16, v31, v31 +; GFX950-SDAG-NEXT: s_nop 0 +; GFX950-SDAG-NEXT: v_pk_max_f16 v15, v15, v16 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_v32f16: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: scratch_load_dword v31, off, s32 +; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-GISEL-NEXT: v_pk_max_f16 v16, v16, v16 +; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX950-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX950-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX950-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX950-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX950-GISEL-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX950-GISEL-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX950-GISEL-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX950-GISEL-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX950-GISEL-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX950-GISEL-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX950-GISEL-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX950-GISEL-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX950-GISEL-NEXT: v_pk_max_f16 v17, v17, v17 +; GFX950-GISEL-NEXT: v_pk_max_f16 v18, v18, v18 +; GFX950-GISEL-NEXT: v_pk_max_f16 v19, v19, v19 +; GFX950-GISEL-NEXT: v_pk_max_f16 v20, v20, v20 +; GFX950-GISEL-NEXT: v_pk_max_f16 v21, v21, v21 +; GFX950-GISEL-NEXT: v_pk_max_f16 v22, v22, v22 +; GFX950-GISEL-NEXT: v_pk_max_f16 v23, v23, v23 +; GFX950-GISEL-NEXT: v_pk_max_f16 v24, v24, v24 +; GFX950-GISEL-NEXT: v_pk_max_f16 v25, v25, v25 +; GFX950-GISEL-NEXT: v_pk_max_f16 v26, v26, v26 +; GFX950-GISEL-NEXT: v_pk_max_f16 v27, v27, v27 +; GFX950-GISEL-NEXT: v_pk_max_f16 v28, v28, v28 +; GFX950-GISEL-NEXT: v_pk_max_f16 v29, v29, v29 +; GFX950-GISEL-NEXT: v_pk_max_f16 v30, v30, v30 +; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v16 +; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v17 +; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v2, v18 +; GFX950-GISEL-NEXT: v_pk_max_f16 v3, v3, v19 +; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v4, v20 +; GFX950-GISEL-NEXT: v_pk_max_f16 v5, v5, v21 +; GFX950-GISEL-NEXT: v_pk_max_f16 v6, v6, v22 +; GFX950-GISEL-NEXT: v_pk_max_f16 v7, v7, v23 +; GFX950-GISEL-NEXT: v_pk_max_f16 v8, v8, v24 +; GFX950-GISEL-NEXT: v_pk_max_f16 v9, v9, v25 +; GFX950-GISEL-NEXT: v_pk_max_f16 v10, v10, v26 +; GFX950-GISEL-NEXT: v_pk_max_f16 v11, v11, v27 +; GFX950-GISEL-NEXT: v_pk_max_f16 v12, v12, v28 +; GFX950-GISEL-NEXT: v_pk_max_f16 v13, v13, v29 +; GFX950-GISEL-NEXT: v_pk_max_f16 v14, v14, v30 +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX950-GISEL-NEXT: v_pk_max_f16 v16, v31, v31 +; GFX950-GISEL-NEXT: s_nop 0 +; GFX950-GISEL-NEXT: v_pk_max_f16 v15, v15, v16 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_v32f16: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; GFX10-SDAG-NEXT: v_pk_max_f16 v16, v16, v16 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-SDAG-NEXT: v_pk_max_f16 v17, v17, v17 +; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-SDAG-NEXT: v_pk_max_f16 v18, v18, v18 +; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-SDAG-NEXT: v_pk_max_f16 v19, v19, v19 +; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-SDAG-NEXT: v_pk_max_f16 v20, v20, v20 +; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX10-SDAG-NEXT: v_pk_max_f16 v21, v21, v21 +; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX10-SDAG-NEXT: v_pk_max_f16 v22, v22, v22 +; GFX10-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX10-SDAG-NEXT: v_pk_max_f16 v23, v23, v23 +; GFX10-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX10-SDAG-NEXT: v_pk_max_f16 v24, v24, v24 +; GFX10-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX10-SDAG-NEXT: v_pk_max_f16 v25, v25, v25 +; GFX10-SDAG-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX10-SDAG-NEXT: v_pk_max_f16 v26, v26, v26 +; GFX10-SDAG-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX10-SDAG-NEXT: v_pk_max_f16 v27, v27, v27 +; GFX10-SDAG-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX10-SDAG-NEXT: v_pk_max_f16 v28, v28, v28 +; GFX10-SDAG-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX10-SDAG-NEXT: v_pk_max_f16 v29, v29, v29 +; GFX10-SDAG-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX10-SDAG-NEXT: v_pk_max_f16 v30, v30, v30 +; GFX10-SDAG-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX10-SDAG-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v16 +; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v17 +; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v18 +; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v19 +; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v20 +; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v21 +; GFX10-SDAG-NEXT: v_pk_max_f16 v6, v6, v22 +; GFX10-SDAG-NEXT: v_pk_max_f16 v7, v7, v23 +; GFX10-SDAG-NEXT: v_pk_max_f16 v8, v8, v24 +; GFX10-SDAG-NEXT: v_pk_max_f16 v9, v9, v25 +; GFX10-SDAG-NEXT: v_pk_max_f16 v10, v10, v26 +; GFX10-SDAG-NEXT: v_pk_max_f16 v11, v11, v27 +; GFX10-SDAG-NEXT: v_pk_max_f16 v12, v12, v28 +; GFX10-SDAG-NEXT: v_pk_max_f16 v13, v13, v29 +; GFX10-SDAG-NEXT: v_pk_max_f16 v14, v14, v30 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: v_pk_max_f16 v16, v31, v31 +; GFX10-SDAG-NEXT: v_pk_max_f16 v15, v15, v16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_v32f16: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-GISEL-NEXT: v_pk_max_f16 v16, v16, v16 +; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX10-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX10-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX10-GISEL-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX10-GISEL-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX10-GISEL-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX10-GISEL-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX10-GISEL-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX10-GISEL-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX10-GISEL-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX10-GISEL-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX10-GISEL-NEXT: v_pk_max_f16 v17, v17, v17 +; GFX10-GISEL-NEXT: v_pk_max_f16 v18, v18, v18 +; GFX10-GISEL-NEXT: v_pk_max_f16 v19, v19, v19 +; GFX10-GISEL-NEXT: v_pk_max_f16 v20, v20, v20 +; GFX10-GISEL-NEXT: v_pk_max_f16 v21, v21, v21 +; GFX10-GISEL-NEXT: v_pk_max_f16 v22, v22, v22 +; GFX10-GISEL-NEXT: v_pk_max_f16 v23, v23, v23 +; GFX10-GISEL-NEXT: v_pk_max_f16 v24, v24, v24 +; GFX10-GISEL-NEXT: v_pk_max_f16 v25, v25, v25 +; GFX10-GISEL-NEXT: v_pk_max_f16 v26, v26, v26 +; GFX10-GISEL-NEXT: v_pk_max_f16 v27, v27, v27 +; GFX10-GISEL-NEXT: v_pk_max_f16 v28, v28, v28 +; GFX10-GISEL-NEXT: v_pk_max_f16 v29, v29, v29 +; GFX10-GISEL-NEXT: v_pk_max_f16 v30, v30, v30 +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v16 +; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v17 +; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v18 +; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v19 +; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v20 +; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v21 +; GFX10-GISEL-NEXT: v_pk_max_f16 v6, v6, v22 +; GFX10-GISEL-NEXT: v_pk_max_f16 v7, v7, v23 +; GFX10-GISEL-NEXT: v_pk_max_f16 v8, v8, v24 +; GFX10-GISEL-NEXT: v_pk_max_f16 v9, v9, v25 +; GFX10-GISEL-NEXT: v_pk_max_f16 v10, v10, v26 +; GFX10-GISEL-NEXT: v_pk_max_f16 v11, v11, v27 +; GFX10-GISEL-NEXT: v_pk_max_f16 v12, v12, v28 +; GFX10-GISEL-NEXT: v_pk_max_f16 v13, v13, v29 +; GFX10-GISEL-NEXT: v_pk_max_f16 v14, v14, v30 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: v_pk_max_f16 v16, v31, v31 +; GFX10-GISEL-NEXT: v_pk_max_f16 v15, v15, v16 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_v32f16: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: scratch_load_b32 v31, off, s32 +; GFX11-SDAG-NEXT: v_pk_max_f16 v16, v16, v16 +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-SDAG-NEXT: v_pk_max_f16 v17, v17, v17 +; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-SDAG-NEXT: v_pk_max_f16 v18, v18, v18 +; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-SDAG-NEXT: v_pk_max_f16 v19, v19, v19 +; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-SDAG-NEXT: v_pk_max_f16 v20, v20, v20 +; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX11-SDAG-NEXT: v_pk_max_f16 v21, v21, v21 +; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX11-SDAG-NEXT: v_pk_max_f16 v22, v22, v22 +; GFX11-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX11-SDAG-NEXT: v_pk_max_f16 v23, v23, v23 +; GFX11-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX11-SDAG-NEXT: v_pk_max_f16 v24, v24, v24 +; GFX11-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX11-SDAG-NEXT: v_pk_max_f16 v25, v25, v25 +; GFX11-SDAG-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX11-SDAG-NEXT: v_pk_max_f16 v26, v26, v26 +; GFX11-SDAG-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX11-SDAG-NEXT: v_pk_max_f16 v27, v27, v27 +; GFX11-SDAG-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX11-SDAG-NEXT: v_pk_max_f16 v28, v28, v28 +; GFX11-SDAG-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX11-SDAG-NEXT: v_pk_max_f16 v29, v29, v29 +; GFX11-SDAG-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX11-SDAG-NEXT: v_pk_max_f16 v30, v30, v30 +; GFX11-SDAG-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX11-SDAG-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v16 +; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v17 +; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v18 +; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v19 +; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v20 +; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v21 +; GFX11-SDAG-NEXT: v_pk_max_f16 v6, v6, v22 +; GFX11-SDAG-NEXT: v_pk_max_f16 v7, v7, v23 +; GFX11-SDAG-NEXT: v_pk_max_f16 v8, v8, v24 +; GFX11-SDAG-NEXT: v_pk_max_f16 v9, v9, v25 +; GFX11-SDAG-NEXT: v_pk_max_f16 v10, v10, v26 +; GFX11-SDAG-NEXT: v_pk_max_f16 v11, v11, v27 +; GFX11-SDAG-NEXT: v_pk_max_f16 v12, v12, v28 +; GFX11-SDAG-NEXT: v_pk_max_f16 v13, v13, v29 +; GFX11-SDAG-NEXT: v_pk_max_f16 v14, v14, v30 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: v_pk_max_f16 v16, v31, v31 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_pk_max_f16 v15, v15, v16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_v32f16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: scratch_load_b32 v31, off, s32 +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-GISEL-NEXT: v_pk_max_f16 v16, v16, v16 +; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX11-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX11-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX11-GISEL-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX11-GISEL-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX11-GISEL-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX11-GISEL-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX11-GISEL-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX11-GISEL-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX11-GISEL-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX11-GISEL-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX11-GISEL-NEXT: v_pk_max_f16 v17, v17, v17 +; GFX11-GISEL-NEXT: v_pk_max_f16 v18, v18, v18 +; GFX11-GISEL-NEXT: v_pk_max_f16 v19, v19, v19 +; GFX11-GISEL-NEXT: v_pk_max_f16 v20, v20, v20 +; GFX11-GISEL-NEXT: v_pk_max_f16 v21, v21, v21 +; GFX11-GISEL-NEXT: v_pk_max_f16 v22, v22, v22 +; GFX11-GISEL-NEXT: v_pk_max_f16 v23, v23, v23 +; GFX11-GISEL-NEXT: v_pk_max_f16 v24, v24, v24 +; GFX11-GISEL-NEXT: v_pk_max_f16 v25, v25, v25 +; GFX11-GISEL-NEXT: v_pk_max_f16 v26, v26, v26 +; GFX11-GISEL-NEXT: v_pk_max_f16 v27, v27, v27 +; GFX11-GISEL-NEXT: v_pk_max_f16 v28, v28, v28 +; GFX11-GISEL-NEXT: v_pk_max_f16 v29, v29, v29 +; GFX11-GISEL-NEXT: v_pk_max_f16 v30, v30, v30 +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v16 +; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v17 +; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v18 +; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v19 +; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v20 +; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v21 +; GFX11-GISEL-NEXT: v_pk_max_f16 v6, v6, v22 +; GFX11-GISEL-NEXT: v_pk_max_f16 v7, v7, v23 +; GFX11-GISEL-NEXT: v_pk_max_f16 v8, v8, v24 +; GFX11-GISEL-NEXT: v_pk_max_f16 v9, v9, v25 +; GFX11-GISEL-NEXT: v_pk_max_f16 v10, v10, v26 +; GFX11-GISEL-NEXT: v_pk_max_f16 v11, v11, v27 +; GFX11-GISEL-NEXT: v_pk_max_f16 v12, v12, v28 +; GFX11-GISEL-NEXT: v_pk_max_f16 v13, v13, v29 +; GFX11-GISEL-NEXT: v_pk_max_f16 v14, v14, v30 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: v_pk_max_f16 v16, v31, v31 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_pk_max_f16 v15, v15, v16 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_v32f16: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: scratch_load_b32 v31, off, s32 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v16, v16, v16 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v17, v17, v17 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v18, v18, v18 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v19, v19, v19 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v20, v20, v20 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v21, v21, v21 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v22, v22, v22 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v23, v23, v23 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v24, v24, v24 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v8, v8, v8 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v25, v25, v25 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v9, v9, v9 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v26, v26, v26 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v10, v10, v10 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v27, v27, v27 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v11, v11, v11 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v28, v28, v28 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v12, v12, v12 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v29, v29, v29 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v13, v13, v13 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v30, v30, v30 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v14, v14, v14 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v15, v15, v15 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v16 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v17 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v18 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v19 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v20 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v21 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v22 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v23 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v8, v8, v24 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v9, v9, v25 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v10, v10, v26 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v11, v11, v27 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v12, v12, v28 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v13, v13, v29 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v14, v14, v30 +; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v16, v31, v31 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v15, v15, v16 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_v32f16: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: scratch_load_b32 v31, off, s32 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v16, v16, v16 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v8, v8, v8 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v9, v9, v9 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v10, v10, v10 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v11, v11, v11 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v12, v12, v12 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v13, v13, v13 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v14, v14, v14 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v15, v15, v15 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v17, v17, v17 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v18, v18, v18 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v19, v19, v19 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v20, v20, v20 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v21, v21, v21 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v22, v22, v22 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v23, v23, v23 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v24, v24, v24 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v25, v25, v25 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v26, v26, v26 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v27, v27, v27 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v28, v28, v28 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v29, v29, v29 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v30, v30, v30 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v16 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v17 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v18 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v19 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v20 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v21 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v22 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v23 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v8, v8, v24 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v9, v9, v25 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v10, v10, v26 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v11, v11, v27 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v12, v12, v28 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v13, v13, v29 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v14, v14, v30 +; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v16, v31, v31 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v15, v15, v16 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <32 x half> @llvm.maximumnum.v32f16(<32 x half> %x, <32 x half> %y) + ret <32 x half> %result +} + +define <2 x float> @v_maximumnum_v2f32(<2 x float> %x, <2 x float> %y) { +; GFX7-SDAG-LABEL: v_maximumnum_v2f32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v3 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v2 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_v2f32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_v2f32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v3 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX8-SDAG-NEXT: v_max_f32_e32 v1, v1, v2 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_v2f32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX8-GISEL-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_maximumnum_v2f32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v2 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_v2f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_v2f32: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_pk_mul_f32 v[0:1], 1.0, v[0:1] op_sel_hi:[0,1] +; GFX950-GISEL-NEXT: v_pk_mul_f32 v[2:3], 1.0, v[2:3] op_sel_hi:[0,1] +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v2 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_v2f32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_v2f32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX10-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX10-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_v2f32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 +; GFX11-SDAG-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_v2f32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 +; GFX11-GISEL-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_v2f32: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_v2f32: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> %x, <2 x float> %y) ret <2 x float> %result } @@ -3884,87 +6766,190 @@ define <2 x float> @v_maximumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) { } define <3 x float> @v_maximumnum_v3f32(<3 x float> %x, <3 x float> %y) { -; GFX7-LABEL: v_maximumnum_v3f32: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3 -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v3 -; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v4 -; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 -; GFX7-NEXT: v_max_f32_e32 v1, v1, v3 -; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v5 -; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2 -; GFX7-NEXT: v_max_f32_e32 v2, v2, v3 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_v3f32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v3 -; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX8-NEXT: v_max_f32_e32 v0, v0, v3 -; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v4 -; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 -; GFX8-NEXT: v_max_f32_e32 v1, v1, v3 -; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v5 -; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2 -; GFX8-NEXT: v_max_f32_e32 v2, v2, v3 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_maximumnum_v3f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v3, v3, v3 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v3 -; GFX9-NEXT: v_max_f32_e32 v3, v4, v4 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v3 -; GFX9-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX9-NEXT: v_max_f32_e32 v2, v2, v3 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_v3f32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e32 v3, v3, v3 -; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX10-NEXT: v_max_f32_e32 v4, v4, v4 -; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX10-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX10-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX10-NEXT: v_max_f32_e32 v0, v0, v3 -; GFX10-NEXT: v_max_f32_e32 v1, v1, v4 -; GFX10-NEXT: v_max_f32_e32 v2, v2, v5 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_v3f32: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_dual_max_f32 v3, v3, v3 :: v_dual_max_f32 v0, v0, v0 -; GFX11-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v1, v1, v1 -; GFX11-NEXT: v_dual_max_f32 v5, v5, v5 :: v_dual_max_f32 v2, v2, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4 -; GFX11-NEXT: v_max_f32_e32 v2, v2, v5 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_v3f32: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_dual_max_num_f32 v3, v3, v3 :: v_dual_max_num_f32 v0, v0, v0 -; GFX12-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v1, v1, v1 -; GFX12-NEXT: v_dual_max_num_f32 v5, v5, v5 :: v_dual_max_num_f32 v2, v2, v2 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v3 :: v_dual_max_num_f32 v1, v1, v4 -; GFX12-NEXT: v_max_num_f32_e32 v2, v2, v5 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_v3f32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v4 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v5 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v3 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_v3f32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v4 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v5, 1.0, v5 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v4 +; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v5 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_v3f32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v4 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX8-SDAG-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v5 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX8-SDAG-NEXT: v_max_f32_e32 v2, v2, v3 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_v3f32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v4 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v5, 1.0, v5 +; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX8-GISEL-NEXT: v_max_f32_e32 v1, v1, v4 +; GFX8-GISEL-NEXT: v_max_f32_e32 v2, v2, v5 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_maximumnum_v3f32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v4, v4 +; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v3 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_v3f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX900-GISEL-NEXT: v_max_f32_e32 v5, v5, v5 +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v4 +; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v5 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_v3f32: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_mov_b32_e32 v6, v3 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v7, v4 +; GFX950-GISEL-NEXT: v_pk_mul_f32 v[0:1], 1.0, v[0:1] op_sel_hi:[0,1] +; GFX950-GISEL-NEXT: v_pk_mul_f32 v[2:3], 1.0, v[2:3] op_sel_hi:[0,1] +; GFX950-GISEL-NEXT: v_pk_mul_f32 v[6:7], 1.0, v[6:7] op_sel_hi:[0,1] +; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v5 +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v6, v6 +; GFX950-GISEL-NEXT: v_pk_mul_f32 v[4:5], 1.0, v[4:5] op_sel_hi:[0,1] +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v7, v7 +; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v4, v4 +; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v3 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_v3f32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX10-SDAG-NEXT: v_max_f32_e32 v5, v5, v5 +; GFX10-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v4 +; GFX10-SDAG-NEXT: v_max_f32_e32 v2, v2, v5 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_v3f32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX10-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX10-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX10-GISEL-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX10-GISEL-NEXT: v_max_f32_e32 v5, v5, v5 +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v4 +; GFX10-GISEL-NEXT: v_max_f32_e32 v2, v2, v5 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_v3f32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_dual_max_f32 v3, v3, v3 :: v_dual_max_f32 v0, v0, v0 +; GFX11-SDAG-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v1, v1, v1 +; GFX11-SDAG-NEXT: v_dual_max_f32 v5, v5, v5 :: v_dual_max_f32 v2, v2, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4 +; GFX11-SDAG-NEXT: v_max_f32_e32 v2, v2, v5 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_v3f32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 +; GFX11-GISEL-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 +; GFX11-GISEL-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4 +; GFX11-GISEL-NEXT: v_max_f32_e32 v2, v2, v5 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_v3f32: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v3, v3, v3 :: v_dual_max_num_f32 v0, v0, v0 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v1, v1, v1 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v5, v5, v5 :: v_dual_max_num_f32 v2, v2, v2 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v3 :: v_dual_max_num_f32 v1, v1, v4 +; GFX12-SDAG-NEXT: v_max_num_f32_e32 v2, v2, v5 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_v3f32: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v3 :: v_dual_max_num_f32 v1, v1, v4 +; GFX12-GISEL-NEXT: v_max_num_f32_e32 v2, v2, v5 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <3 x float> @llvm.maximumnum.v3f32(<3 x float> %x, <3 x float> %y) ret <3 x float> %result } @@ -4024,101 +7009,218 @@ define <3 x float> @v_maximumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) { } define <4 x float> @v_maximumnum_v4f32(<4 x float> %x, <4 x float> %y) { -; GFX7-LABEL: v_maximumnum_v4f32: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4 -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v4 -; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v5 -; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 -; GFX7-NEXT: v_max_f32_e32 v1, v1, v4 -; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v6 -; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2 -; GFX7-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v7 -; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3 -; GFX7-NEXT: v_max_f32_e32 v3, v3, v4 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_v4f32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v4 -; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX8-NEXT: v_max_f32_e32 v0, v0, v4 -; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v5 -; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 -; GFX8-NEXT: v_max_f32_e32 v1, v1, v4 -; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v6 -; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2 -; GFX8-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v7 -; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v3 -; GFX8-NEXT: v_max_f32_e32 v3, v3, v4 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_maximumnum_v4f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v4, v4, v4 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v4 -; GFX9-NEXT: v_max_f32_e32 v4, v5, v5 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v4 -; GFX9-NEXT: v_max_f32_e32 v4, v6, v6 -; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX9-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX9-NEXT: v_max_f32_e32 v4, v7, v7 -; GFX9-NEXT: v_max_f32_e32 v3, v3, v3 -; GFX9-NEXT: v_max_f32_e32 v3, v3, v4 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_v4f32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e32 v4, v4, v4 -; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX10-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX10-NEXT: v_max_f32_e32 v6, v6, v6 -; GFX10-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX10-NEXT: v_max_f32_e32 v7, v7, v7 -; GFX10-NEXT: v_max_f32_e32 v3, v3, v3 -; GFX10-NEXT: v_max_f32_e32 v0, v0, v4 -; GFX10-NEXT: v_max_f32_e32 v1, v1, v5 -; GFX10-NEXT: v_max_f32_e32 v2, v2, v6 -; GFX10-NEXT: v_max_f32_e32 v3, v3, v7 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_v4f32: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5 -; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 -; GFX11-NEXT: v_dual_max_f32 v6, v6, v6 :: v_dual_max_f32 v7, v7, v7 -; GFX11-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5 -; GFX11-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_v4f32: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5 -; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 -; GFX12-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7 -; GFX12-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v4 :: v_dual_max_num_f32 v1, v1, v5 -; GFX12-NEXT: v_dual_max_num_f32 v2, v2, v6 :: v_dual_max_num_f32 v3, v3, v7 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_v4f32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v5 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v4 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v6 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v7 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; GFX7-SDAG-NEXT: v_max_f32_e32 v3, v3, v4 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_v4f32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v4 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v5, 1.0, v5 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v6, 1.0, v6 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v7, 1.0, v7 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v5 +; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v6 +; GFX7-GISEL-NEXT: v_max_f32_e32 v3, v3, v7 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_v4f32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v5 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX8-SDAG-NEXT: v_max_f32_e32 v1, v1, v4 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v6 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX8-SDAG-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v7 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; GFX8-SDAG-NEXT: v_max_f32_e32 v3, v3, v4 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_v4f32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v4 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v5, 1.0, v5 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v6, 1.0, v6 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v7, 1.0, v7 +; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX8-GISEL-NEXT: v_max_f32_e32 v1, v1, v5 +; GFX8-GISEL-NEXT: v_max_f32_e32 v2, v2, v6 +; GFX8-GISEL-NEXT: v_max_f32_e32 v3, v3, v7 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_maximumnum_v4f32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v5, v5 +; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v4 +; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v6, v6 +; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v7, v7 +; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_v4f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX900-GISEL-NEXT: v_max_f32_e32 v5, v5, v5 +; GFX900-GISEL-NEXT: v_max_f32_e32 v6, v6, v6 +; GFX900-GISEL-NEXT: v_max_f32_e32 v7, v7, v7 +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v5 +; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v6 +; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v7 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_v4f32: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_pk_mul_f32 v[0:1], 1.0, v[0:1] op_sel_hi:[0,1] +; GFX950-GISEL-NEXT: v_pk_mul_f32 v[4:5], 1.0, v[4:5] op_sel_hi:[0,1] +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX950-GISEL-NEXT: v_pk_mul_f32 v[2:3], 1.0, v[2:3] op_sel_hi:[0,1] +; GFX950-GISEL-NEXT: v_pk_mul_f32 v[6:7], 1.0, v[6:7] op_sel_hi:[0,1] +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v5, v5 +; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v4 +; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v6, v6 +; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v7, v7 +; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v3, v4 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_v4f32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_max_f32_e32 v5, v5, v5 +; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX10-SDAG-NEXT: v_max_f32_e32 v6, v6, v6 +; GFX10-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX10-SDAG-NEXT: v_max_f32_e32 v7, v7, v7 +; GFX10-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v5 +; GFX10-SDAG-NEXT: v_max_f32_e32 v2, v2, v6 +; GFX10-SDAG-NEXT: v_max_f32_e32 v3, v3, v7 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_v4f32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX10-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX10-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX10-GISEL-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX10-GISEL-NEXT: v_max_f32_e32 v5, v5, v5 +; GFX10-GISEL-NEXT: v_max_f32_e32 v6, v6, v6 +; GFX10-GISEL-NEXT: v_max_f32_e32 v7, v7, v7 +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v5 +; GFX10-GISEL-NEXT: v_max_f32_e32 v2, v2, v6 +; GFX10-GISEL-NEXT: v_max_f32_e32 v3, v3, v7 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_v4f32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5 +; GFX11-SDAG-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 +; GFX11-SDAG-NEXT: v_dual_max_f32 v6, v6, v6 :: v_dual_max_f32 v7, v7, v7 +; GFX11-SDAG-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5 +; GFX11-SDAG-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_v4f32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 +; GFX11-GISEL-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 +; GFX11-GISEL-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5 +; GFX11-GISEL-NEXT: v_dual_max_f32 v6, v6, v6 :: v_dual_max_f32 v7, v7, v7 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5 +; GFX11-GISEL-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_v4f32: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v4 :: v_dual_max_num_f32 v1, v1, v5 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v2, v2, v6 :: v_dual_max_num_f32 v3, v3, v7 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_v4f32: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v4 :: v_dual_max_num_f32 v1, v1, v5 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v6 :: v_dual_max_num_f32 v3, v3, v7 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %result } @@ -4182,88 +7284,171 @@ define <4 x float> @v_maximumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) { } define <2 x double> @v_maximumnum_v2f64(<2 x double> %x, <2 x double> %y) { -; GFX7-LABEL: v_maximumnum_v2f64: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] -; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_v2f64: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] -; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_maximumnum_v2f64: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] -; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_maximumnum_v2f64: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] -; GFX950-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7] -; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_v2f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] -; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_v2f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] -; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_v2f64: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] -; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] -; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] -; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[4:5] -; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[6:7] -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_v2f64: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] +; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_v2f64: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX7-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] +; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_v2f64: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] +; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_v2f64: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX8-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] +; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_maximumnum_v2f64: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] +; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_v2f64: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX900-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] +; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_v2f64: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] +; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_v2f64: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX950-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] +; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_v2f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX10-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] +; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_v2f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX10-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX10-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] +; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_v2f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX11-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] +; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_v2f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX11-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX11-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] +; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_v2f64: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[4:5] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[6:7] +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_v2f64: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[4:5] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[6:7] +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> %x, <2 x double> %y) ret <2 x double> %result } @@ -4319,109 +7504,213 @@ define <2 x double> @v_maximumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) { } define <3 x double> @v_maximumnum_v3f64(<3 x double> %x, <3 x double> %y) { -; GFX7-LABEL: v_maximumnum_v3f64: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX7-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] -; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX7-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] -; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] -; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] -; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_v3f64: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX8-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] -; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX8-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] -; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] -; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] -; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_maximumnum_v3f64: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX900-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] -; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX900-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] -; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] -; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] -; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_maximumnum_v3f64: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] -; GFX950-NEXT: v_max_f64 v[6:7], v[8:9], v[8:9] -; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] -; GFX950-NEXT: v_max_f64 v[6:7], v[10:11], v[10:11] -; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[6:7] -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_v3f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX10-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] -; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX10-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] -; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] -; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] -; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_v3f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX11-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] -; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX11-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] -; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] -; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_v3f64: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] -; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] -; GFX12-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9] -; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] -; GFX12-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11] -; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] -; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[6:7] -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[8:9] -; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[10:11] -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_v3f64: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX7-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX7-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX7-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_v3f64: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX7-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX7-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX7-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX7-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_v3f64: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX8-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX8-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX8-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_v3f64: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX8-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX8-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX8-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX8-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_maximumnum_v3f64: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX900-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX900-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX900-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_v3f64: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX900-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX900-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX900-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX900-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_v3f64: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[8:9], v[8:9] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] +; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[10:11], v[10:11] +; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[6:7] +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_v3f64: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX950-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX950-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX950-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_v3f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX10-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX10-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX10-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX10-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_v3f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX10-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX10-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX10-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX10-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX10-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_v3f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX11-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX11-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX11-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX11-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_v3f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX11-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX11-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX11-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX11-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX11-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_v3f64: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[6:7] +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[8:9] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[10:11] +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_v3f64: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[6:7] +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[8:9] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[10:11] +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <3 x double> @llvm.maximumnum.v3f64(<3 x double> %x, <3 x double> %y) ret <3 x double> %result } @@ -4434,179 +7723,304 @@ define <3 x double> @v_maximumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) { ; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] ; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] ; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_v3f64_nnan: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] -; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] -; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_maximumnum_v3f64_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] -; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] -; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_v3f64_nnan: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] -; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] -; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_v3f64_nnan: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] -; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] -; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_v3f64_nnan: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[6:7] -; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[8:9] -; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[10:11] -; GFX12-NEXT: s_setpc_b64 s[30:31] - %result = call nnan <3 x double> @llvm.maximumnum.v3f64(<3 x double> %x, <3 x double> %y) - ret <3 x double> %result -} - -define <4 x double> @v_maximumnum_v4f64(<4 x double> %x, <4 x double> %y) { -; GFX7-LABEL: v_maximumnum_v4f64: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] -; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX7-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] -; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX7-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] -; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX7-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] -; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] -; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] -; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] -; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_v4f64: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX8-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] -; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX8-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] -; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX8-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] -; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] -; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] -; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] -; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] +; +; GFX8-LABEL: v_maximumnum_v3f64_nnan: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_maximumnum_v4f64: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] -; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX900-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] -; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX900-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] -; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX900-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] -; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] -; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] -; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] -; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_maximumnum_v4f64: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] -; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] -; GFX950-NEXT: v_max_f64 v[8:9], v[10:11], v[10:11] -; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] -; GFX950-NEXT: v_max_f64 v[8:9], v[12:13], v[12:13] -; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[8:9] -; GFX950-NEXT: v_max_f64 v[8:9], v[14:15], v[14:15] -; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[8:9] -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_v4f64: +; GFX9-LABEL: v_maximumnum_v3f64_nnan: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_maximumnum_v3f64_nnan: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] -; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX10-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] -; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX10-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] -; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX10-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] -; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] -; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] -; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] -; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] +; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_maximumnum_v4f64: +; GFX11-LABEL: v_maximumnum_v3f64_nnan: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] -; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX11-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] -; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX11-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] -; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX11-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] -; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] -; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] -; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] +; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; -; GFX12-LABEL: v_maximumnum_v4f64: +; GFX12-LABEL: v_maximumnum_v3f64_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9] -; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] -; GFX12-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11] -; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] -; GFX12-NEXT: v_max_num_f64_e32 v[12:13], v[12:13], v[12:13] -; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] -; GFX12-NEXT: v_max_num_f64_e32 v[14:15], v[14:15], v[14:15] -; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] -; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[8:9] -; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[10:11] -; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[12:13] -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[14:15] +; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[6:7] +; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[8:9] +; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[10:11] ; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = call nnan <3 x double> @llvm.maximumnum.v3f64(<3 x double> %x, <3 x double> %y) + ret <3 x double> %result +} + +define <4 x double> @v_maximumnum_v4f64(<4 x double> %x, <4 x double> %y) { +; GFX7-SDAG-LABEL: v_maximumnum_v4f64: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX7-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX7-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX7-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX7-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] +; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] +; GFX7-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] +; GFX7-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_v4f64: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX7-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX7-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX7-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX7-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX7-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] +; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] +; GFX7-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] +; GFX7-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_v4f64: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX8-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX8-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX8-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX8-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] +; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] +; GFX8-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] +; GFX8-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_v4f64: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX8-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX8-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX8-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX8-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX8-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] +; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] +; GFX8-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] +; GFX8-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_maximumnum_v4f64: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX900-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX900-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX900-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX900-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] +; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] +; GFX900-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] +; GFX900-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_v4f64: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX900-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX900-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX900-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX900-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX900-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] +; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] +; GFX900-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] +; GFX900-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_v4f64: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] +; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[10:11], v[10:11] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[12:13], v[12:13] +; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[8:9] +; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[14:15], v[14:15] +; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[8:9] +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_v4f64: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX950-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX950-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX950-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX950-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX950-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] +; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] +; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] +; GFX950-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_v4f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX10-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX10-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX10-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX10-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX10-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] +; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] +; GFX10-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] +; GFX10-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_v4f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX10-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX10-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX10-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX10-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX10-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX10-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] +; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] +; GFX10-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] +; GFX10-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_v4f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX11-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX11-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX11-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX11-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX11-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] +; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] +; GFX11-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_v4f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX11-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX11-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX11-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX11-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX11-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX11-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] +; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] +; GFX11-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_v4f64: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[12:13], v[12:13], v[12:13] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[14:15], v[14:15], v[14:15] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[8:9] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[10:11] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[12:13] +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[14:15] +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_v4f64: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[12:13], v[12:13], v[12:13] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[14:15], v[14:15], v[14:15] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[8:9] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[10:11] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[12:13] +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[14:15] +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <4 x double> @llvm.maximumnum.v4f64(<4 x double> %x, <4 x double> %y) ret <4 x double> %result } @@ -4674,97 +8088,183 @@ define <4 x double> @v_maximumnum_v4f64_nnan(<4 x double> %x, <4 x double> %y) { } define half @v_maximumnum_f16_no_ieee(half %x, half %y) #0 { -; GFX7-LABEL: v_maximumnum_f16_no_ieee: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f16_no_ieee: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_maximumnum_f16_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_f16_no_ieee: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX10-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-TRUE16-LABEL: v_maximumnum_f16_no_ieee: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: v_maximumnum_f16_no_ieee: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-TRUE16-LABEL: v_maximumnum_f16_no_ieee: -; GFX12-TRUE16: ; %bb.0: -; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h -; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-FAKE16-LABEL: v_maximumnum_f16_no_ieee: -; GFX12-FAKE16: ; %bb.0: -; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1 -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 -; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f16_no_ieee: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f16_no_ieee: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f16_no_ieee: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f16_no_ieee: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_maximumnum_f16_no_ieee: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_maximumnum_f16_no_ieee: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX9-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_f16_no_ieee: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_f16_no_ieee: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-SDAG-LABEL: v_maximumnum_f16_no_ieee: +; GFX11-TRUE16-SDAG: ; %bb.0: +; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-GISEL-LABEL: v_maximumnum_f16_no_ieee: +; GFX11-TRUE16-GISEL: ; %bb.0: +; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l +; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-SDAG-LABEL: v_maximumnum_f16_no_ieee: +; GFX11-FAKE16-SDAG: ; %bb.0: +; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-GISEL-LABEL: v_maximumnum_f16_no_ieee: +; GFX11-FAKE16-GISEL: ; %bb.0: +; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_no_ieee: +; GFX12-TRUE16-SDAG: ; %bb.0: +; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-GISEL-LABEL: v_maximumnum_f16_no_ieee: +; GFX12-TRUE16-GISEL: ; %bb.0: +; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l +; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-SDAG-LABEL: v_maximumnum_f16_no_ieee: +; GFX12-FAKE16-SDAG: ; %bb.0: +; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-GISEL-LABEL: v_maximumnum_f16_no_ieee: +; GFX12-FAKE16-GISEL: ; %bb.0: +; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call half @llvm.maximumnum.f16(half %x, half %y) ret half %result } define half @v_maximumnum_f16_nan_no_ieee(half %x, half %y) #0 { -; GFX7-LABEL: v_maximumnum_f16_nan_no_ieee: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f16_nan_no_ieee: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f16_nan_no_ieee: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_maximumnum_f16_nan_no_ieee: ; GFX8: ; %bb.0: @@ -4820,57 +8320,109 @@ define half @v_maximumnum_f16_nan_no_ieee(half %x, half %y) #0 { } define float @v_maximumnum_f32_no_ieee(float %x, float %y) #0 { -; GFX7-LABEL: v_maximumnum_f32_no_ieee: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f32_no_ieee: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 -; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_maximumnum_f32_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_f32_no_ieee: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX10-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_f32_no_ieee: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_f32_no_ieee: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f32_no_ieee: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f32_no_ieee: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f32_no_ieee: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f32_no_ieee: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_maximumnum_f32_no_ieee: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_maximumnum_f32_no_ieee: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_f32_no_ieee: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_f32_no_ieee: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_f32_no_ieee: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_f32_no_ieee: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_f32_no_ieee: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_f32_no_ieee: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.maximumnum.f32(float %x, float %y) ret float %result } @@ -4920,59 +8472,113 @@ define float @v_maximumnum_f32_nnan_no_ieee(float %x, float %y) #0 { } define double @v_maximumnum_f64_no_ieee(double %x, double %y) #0 { -; GFX7-LABEL: v_maximumnum_f64_no_ieee: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_f64_no_ieee: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_maximumnum_f64_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_f64_no_ieee: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_f64_no_ieee: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_f64_no_ieee: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] -; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3] -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_f64_no_ieee: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_f64_no_ieee: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_f64_no_ieee: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_f64_no_ieee: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_maximumnum_f64_no_ieee: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_maximumnum_f64_no_ieee: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX9-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_f64_no_ieee: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_f64_no_ieee: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_f64_no_ieee: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_f64_no_ieee: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_f64_no_ieee: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3] +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_f64_no_ieee: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3] +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call double @llvm.maximumnum.f64(double %x, double %y) ret double %result } @@ -5022,106 +8628,199 @@ define double @v_maximumnum_f64_nnan_no_ieee(double %x, double %y) #0 { } define <2 x half> @v_maximumnum_v2f16_no_ieee(<2 x half> %x, <2 x half> %y) #0 { -; GFX7-LABEL: v_maximumnum_v2f16_no_ieee: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v2 -; GFX7-NEXT: v_max_f32_e32 v1, v1, v3 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_v2f16_no_ieee: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_max_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_maximumnum_v2f16_no_ieee: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v1 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_maximumnum_v2f16_no_ieee: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v1 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_maximumnum_v2f16_no_ieee: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_maximumnum_v2f16_no_ieee: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_pk_max_f16 v0, v0, v1 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_maximumnum_v2f16_no_ieee: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v1 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_v2f16_no_ieee: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_v2f16_no_ieee: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_v2f16_no_ieee: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_v2f16_no_ieee: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v3, v1, v1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v2, v3 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_maximumnum_v2f16_no_ieee: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_v2f16_no_ieee: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_v2f16_no_ieee: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-SDAG-NEXT: s_nop 0 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_v2f16_no_ieee: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-GISEL-NEXT: s_nop 0 +; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_maximumnum_v2f16_no_ieee: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_maximumnum_v2f16_no_ieee: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_maximumnum_v2f16_no_ieee: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_maximumnum_v2f16_no_ieee: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_maximumnum_v2f16_no_ieee: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v1 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_maximumnum_v2f16_no_ieee: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v1 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> %x, <2 x half> %y) ret <2 x half> %result } define <2 x half> @v_maximumnum_v2f16_nnan_no_ieee(<2 x half> %x, <2 x half> %y) #0 { -; GFX7-LABEL: v_maximumnum_v2f16_nnan_no_ieee: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v2 -; GFX7-NEXT: v_max_f32_e32 v1, v1, v3 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_v2f16_nnan_no_ieee: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_v2f16_nnan_no_ieee: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_v2f16_nnan_no_ieee: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_v2f16_nnan_no_ieee: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_v2f16_nnan_no_ieee: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v0, v1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_maximumnum_v2f16_nnan_no_ieee: ; GFX9: ; %bb.0: @@ -5155,34 +8854,60 @@ define <2 x half> @v_maximumnum_v2f16_nnan_no_ieee(<2 x half> %x, <2 x half> %y) } define <3 x half> @v_maximumnum_v3f16_nnan_no_ieee(<3 x half> %x, <3 x half> %y) #0 { -; GFX7-LABEL: v_maximumnum_v3f16_nnan_no_ieee: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v3 -; GFX7-NEXT: v_max_f32_e32 v1, v1, v4 -; GFX7-NEXT: v_max_f32_e32 v2, v2, v5 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_v3f16_nnan_no_ieee: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v2 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v3 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_v3f16_nnan_no_ieee: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v4 +; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v5 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_v3f16_nnan_no_ieee: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v5 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_v3f16_nnan_no_ieee: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v3 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_v3f16_nnan_no_ieee: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v0, v2 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v3 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_maximumnum_v3f16_nnan_no_ieee: ; GFX9: ; %bb.0: @@ -5220,41 +8945,73 @@ define <3 x half> @v_maximumnum_v3f16_nnan_no_ieee(<3 x half> %x, <3 x half> %y) } define <4 x half> @v_maximumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y) #0 { -; GFX7-LABEL: v_maximumnum_v4f16_nnan_no_ieee: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 -; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 -; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 -; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 -; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_max_f32_e32 v0, v0, v4 -; GFX7-NEXT: v_max_f32_e32 v1, v1, v5 -; GFX7-NEXT: v_max_f32_e32 v2, v2, v6 -; GFX7-NEXT: v_max_f32_e32 v3, v3, v7 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_maximumnum_v4f16_nnan_no_ieee: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v3 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v2 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_maximumnum_v4f16_nnan_no_ieee: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX7-SDAG-NEXT: v_max_f32_e32 v1, v1, v5 +; GFX7-SDAG-NEXT: v_max_f32_e32 v2, v2, v6 +; GFX7-SDAG-NEXT: v_max_f32_e32 v3, v3, v7 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_maximumnum_v4f16_nnan_no_ieee: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v6 +; GFX7-GISEL-NEXT: v_max_f32_e32 v1, v1, v5 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v7 +; GFX7-GISEL-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: v_max_f32_e32 v3, v3, v5 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_maximumnum_v4f16_nnan_no_ieee: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v3 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v5 +; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_maximumnum_v4f16_nnan_no_ieee: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v0, v2 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v1, v3 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_maximumnum_v4f16_nnan_no_ieee: ; GFX9: ; %bb.0: @@ -5292,3 +9049,6 @@ define <4 x half> @v_maximumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y) } attributes #0 = { "amdgpu-ieee"="false" } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX900: {{.*}} +; GFX950: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/minimumnum.ll b/llvm/test/CodeGen/AMDGPU/minimumnum.ll index b12385d19c617..5cb051d2ab857 100644 --- a/llvm/test/CodeGen/AMDGPU/minimumnum.ll +++ b/llvm/test/CodeGen/AMDGPU/minimumnum.ll @@ -1,106 +1,209 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s + +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s + +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-SDAG,GFX900-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-GISEL,GFX900-GISEL %s + +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-SDAG,GFX950-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-GISEL,GFX950-GISEL %s + +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s + +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16,GFX11-SDAG,GFX11-TRUE16-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16,GFX11-GISEL,GFX11-TRUE16-GISEL %s + +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-SDAG,GFX11-FAKE16-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-GISEL,GFX11-FAKE16-GISEL %s + +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16,GFX12-SDAG,GFX12-TRUE16-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16,GFX12-GISEL,GFX12-TRUE16-GISEL %s + +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16,GFX12-SDAG,GFX12-FAKE16-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16,GFX12-GISEL,GFX12-FAKE16-GISEL %s define half @v_minimumnum_f16(half %x, half %y) { -; GFX7-LABEL: v_minimumnum_f16: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f16: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_minimumnum_f16: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_f16: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX10-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-TRUE16-LABEL: v_minimumnum_f16: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: v_minimumnum_f16: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-TRUE16-LABEL: v_minimumnum_f16: -; GFX12-TRUE16: ; %bb.0: -; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h -; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-FAKE16-LABEL: v_minimumnum_f16: -; GFX12-FAKE16: ; %bb.0: -; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1 -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 -; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_f16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f16: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f16: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_minimumnum_f16: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_minimumnum_f16: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX9-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_f16: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_f16: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16: +; GFX11-TRUE16-SDAG: ; %bb.0: +; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16: +; GFX11-TRUE16-GISEL: ; %bb.0: +; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l +; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16: +; GFX11-FAKE16-SDAG: ; %bb.0: +; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16: +; GFX11-FAKE16-GISEL: ; %bb.0: +; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16: +; GFX12-TRUE16-SDAG: ; %bb.0: +; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16: +; GFX12-TRUE16-GISEL: ; %bb.0: +; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l +; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16: +; GFX12-FAKE16-SDAG: ; %bb.0: +; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16: +; GFX12-FAKE16-GISEL: ; %bb.0: +; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call half @llvm.minimumnum.f16(half %x, half %y) ret half %result } define half @v_minimumnum_f16_nnan(half %x, half %y) { -; GFX7-LABEL: v_minimumnum_f16_nnan: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_f16_nnan: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f16_nnan: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_minimumnum_f16_nnan: ; GFX8: ; %bb.0: @@ -156,13 +259,22 @@ define half @v_minimumnum_f16_nnan(half %x, half %y) { } define half @v_minimumnum_f16_1.0(half %x) { -; GFX7-LABEL: v_minimumnum_f16_1.0: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_min_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_f16_1.0: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f16_1.0: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, 1.0 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_minimumnum_f16_1.0: ; GFX8: ; %bb.0: @@ -229,57 +341,109 @@ define half @v_minimumnum_f16_1.0(half %x) { } define float @v_minimumnum_f32(float %x, float %y) { -; GFX7-LABEL: v_minimumnum_f32: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 -; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_minimumnum_f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_f32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_f32: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_f32: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_f32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_minimumnum_f32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_minimumnum_f32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_f32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_f32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_f32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_f32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_f32: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_f32: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.minimumnum.f32(float %x, float %y) ret float %result } @@ -329,59 +493,113 @@ define float @v_minimumnum_f32_nnan(float %x, float %y) { } define double @v_minimumnum_f64(double %x, double %y) { -; GFX7-LABEL: v_minimumnum_f64: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f64: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_minimumnum_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_f64: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] -; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_f64: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f64: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f64: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f64: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_minimumnum_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX9-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_minimumnum_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX9-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX9-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX10-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX10-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_f64: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_f64: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call double @llvm.minimumnum.f64(double %x, double %y) ret double %result } @@ -707,432 +925,820 @@ define double @v_minimumnum_f64_1.0(double %x) { } define half @v_minimumnum_f16_v_s(half %x, half inreg %y) { -; GFX7-LABEL: v_minimumnum_f16_v_s: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s16 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f16_v_s: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_e64 v1, s16, s16 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_minimumnum_f16_v_s: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f16_e64 v1, s16, s16 -; GFX900-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX900-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_minimumnum_f16_v_s: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f16_e64 v1, s0, s0 -; GFX950-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX950-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_f16_v_s: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f16_e64 v1, s16, s16 -; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX10-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-TRUE16-LABEL: v_minimumnum_f16_v_s: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, s0, s0 -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: v_minimumnum_f16_v_s: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, s0, s0 -; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-TRUE16-LABEL: v_minimumnum_f16_v_s: -; GFX12-TRUE16: ; %bb.0: -; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, s0, s0 -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h -; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-FAKE16-LABEL: v_minimumnum_f16_v_s: -; GFX12-FAKE16: ; %bb.0: -; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, s0, s0 -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 -; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_f16_v_s: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, s16 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f16_v_s: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, s16 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f16_v_s: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f16_v_s: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_minimumnum_f16_v_s: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_f16_v_s: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX900-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_f16_v_s: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f16_e64 v1, s0, s0 +; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_f16_v_s: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f16_e64 v1, s0, s0 +; GFX950-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_f16_v_s: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_f16_v_s: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16_v_s: +; GFX11-TRUE16-SDAG: ; %bb.0: +; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, s0, s0 +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16_v_s: +; GFX11-TRUE16-GISEL: ; %bb.0: +; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, s0, s0 +; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16_v_s: +; GFX11-FAKE16-SDAG: ; %bb.0: +; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, s0, s0 +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16_v_s: +; GFX11-FAKE16-GISEL: ; %bb.0: +; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, s0, s0 +; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_v_s: +; GFX12-TRUE16-SDAG: ; %bb.0: +; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, s0, s0 +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16_v_s: +; GFX12-TRUE16-GISEL: ; %bb.0: +; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, s0, s0 +; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16_v_s: +; GFX12-FAKE16-SDAG: ; %bb.0: +; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, s0, s0 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16_v_s: +; GFX12-FAKE16-GISEL: ; %bb.0: +; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, s0, s0 +; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call half @llvm.minimumnum.f16(half %x, half %y) ret half %result } define half @v_minimumnum_f16_s_s(half inreg %x, half inreg %y) { -; GFX7-LABEL: v_minimumnum_f16_s_s: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, s16 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s17 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f16_s_s: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_e64 v0, s17, s17 -; GFX8-NEXT: v_max_f16_e64 v1, s16, s16 -; GFX8-NEXT: v_min_f16_e32 v0, v1, v0 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_f16_s_s: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, s16 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, s17 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f16_s_s: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, s16 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, s17 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f16_s_s: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_e64 v0, s17, s17 +; GFX8-SDAG-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v1, v0 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f16_s_s: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e64 v0, s16, s16 +; GFX8-GISEL-NEXT: v_max_f16_e64 v1, s17, s17 +; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_minimumnum_f16_s_s: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f16_e64 v0, s17, s17 +; GFX900-SDAG-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX900-SDAG-NEXT: v_min_f16_e32 v0, v1, v0 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_f16_s_s: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f16_e64 v0, s16, s16 +; GFX900-GISEL-NEXT: v_max_f16_e64 v1, s17, s17 +; GFX900-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_f16_s_s: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f16_e64 v0, s1, s1 +; GFX950-SDAG-NEXT: v_max_f16_e64 v1, s0, s0 +; GFX950-SDAG-NEXT: v_min_f16_e32 v0, v1, v0 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_f16_s_s: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f16_e64 v0, s0, s0 +; GFX950-GISEL-NEXT: v_max_f16_e64 v1, s1, s1 +; GFX950-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_f16_s_s: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f16_e64 v0, s17, s17 +; GFX10-SDAG-NEXT: v_max_f16_e64 v1, s16, s16 +; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v1, v0 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_f16_s_s: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f16_e64 v0, s16, s16 +; GFX10-GISEL-NEXT: v_max_f16_e64 v1, s17, s17 +; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16_s_s: +; GFX11-TRUE16-SDAG: ; %bb.0: +; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.l, s1, s1 +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, s0, s0 +; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.h, v0.l +; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16_s_s: +; GFX11-TRUE16-GISEL: ; %bb.0: +; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.l, s0, s0 +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, s1, s1 +; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16_s_s: +; GFX11-FAKE16-SDAG: ; %bb.0: +; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v0, s1, s1 +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, s0, s0 +; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v1, v0 +; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16_s_s: +; GFX11-FAKE16-GISEL: ; %bb.0: +; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v0, s0, s0 +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, s1, s1 +; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_s_s: +; GFX12-TRUE16-SDAG: ; %bb.0: +; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, s1, s1 +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, s0, s0 +; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.h, v0.l +; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16_s_s: +; GFX12-TRUE16-GISEL: ; %bb.0: +; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, s0, s0 +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, s1, s1 +; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16_s_s: +; GFX12-FAKE16-SDAG: ; %bb.0: +; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, s1, s1 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, s0, s0 +; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v1, v0 +; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16_s_s: +; GFX12-FAKE16-GISEL: ; %bb.0: +; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, s0, s0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, s1, s1 +; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call half @llvm.minimumnum.f16(half %x, half %y) + ret half %result +} + +define float @v_minimumnum_f32_s_v(float inreg %x, float %y) { +; GFX7-SDAG-LABEL: v_minimumnum_f32_s_v: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f32_s_v: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s16 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v1, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f32_s_v: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16 +; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f32_s_v: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s16 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v1, v0 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_minimumnum_f32_s_v: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_max_f32_e64 v1, s16, s16 +; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_f32_s_v: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e64 v1, s16, s16 +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v1, v0 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_f32_s_v: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_max_f32_e64 v1, s0, s0 +; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_f32_s_v: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f32_e64 v1, s0, s0 +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v1, v0 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_f32_s_v: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_max_f32_e64 v1, s16, s16 +; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_f32_s_v: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e64 v1, s16, s16 +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v1, v0 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_f32_s_v: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX11-SDAG-NEXT: v_max_f32_e64 v1, s0, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_f32_s_v: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f32_e64 v1, s0, s0 +; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v1, v0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_f32_s_v: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v1, v0 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_f32_s_v: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, s0, s0 +; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v1, v0 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call float @llvm.minimumnum.f32(float %x, float %y) + ret float %result +} + +define float @v_minimumnum_f32_v_s(float %x, float inreg %y) { +; GFX7-SDAG-LABEL: v_minimumnum_f32_v_s: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f32_v_s: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s16 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f32_v_s: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f32_v_s: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s16 +; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_minimumnum_f32_v_s: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f32_e64 v1, s16, s16 +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_f32_v_s: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f32_e64 v1, s16, s16 +; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_f32_v_s: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f32_e64 v1, s0, s0 +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_f32_v_s: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f32_e64 v1, s0, s0 +; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_f32_v_s: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e64 v1, s16, s16 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_f32_v_s: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f32_e64 v1, s16, s16 +; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_f32_v_s: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f32_e64 v1, s0, s0 +; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_f32_v_s: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX11-GISEL-NEXT: v_max_f32_e64 v1, s0, s0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_f32_v_s: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0 +; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_f32_v_s: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, s0, s0 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call float @llvm.minimumnum.f32(float %x, float %y) + ret float %result +} + +define float @v_minimumnum_f32_s_s(float inreg %x, float inreg %y) { +; GFX7-SDAG-LABEL: v_minimumnum_f32_s_s: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e64 v0, 1.0, s17 +; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f32_s_s: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, s16 +; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s17 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f32_s_s: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e64 v0, 1.0, s17 +; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16 +; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f32_s_s: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, s16 +; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s17 +; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_minimumnum_f32_s_s: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f32_e64 v0, s17, s17 +; GFX900-SDAG-NEXT: v_max_f32_e64 v1, s16, s16 +; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_f32_s_s: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e64 v0, s16, s16 +; GFX900-GISEL-NEXT: v_max_f32_e64 v1, s17, s17 +; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_f32_s_s: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f32_e64 v0, s1, s1 +; GFX950-SDAG-NEXT: v_max_f32_e64 v1, s0, s0 +; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_f32_s_s: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f32_e64 v0, s0, s0 +; GFX950-GISEL-NEXT: v_max_f32_e64 v1, s1, s1 +; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_f32_s_s: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e64 v0, s17, s17 +; GFX10-SDAG-NEXT: v_max_f32_e64 v1, s16, s16 +; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_f32_s_s: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e64 v0, s16, s16 +; GFX10-GISEL-NEXT: v_max_f32_e64 v1, s17, s17 +; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_f32_s_s: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f32_e64 v0, s1, s1 +; GFX11-SDAG-NEXT: v_max_f32_e64 v1, s0, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_f32_s_s: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f32_e64 v0, s0, s0 +; GFX11-GISEL-NEXT: v_max_f32_e64 v1, s1, s1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_f32_s_s: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f32_e64 v0, s1, s1 +; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v1, v0 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_f32_s_s: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f32_e64 v0, s0, s0 +; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, s1, s1 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call float @llvm.minimumnum.f32(float %x, float %y) + ret float %result +} + +define double @v_minimumnum_f64_s_v(double inreg %x, double %y) { +; GFX7-SDAG-LABEL: v_minimumnum_f64_s_v: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f64_s_v: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f64_s_v: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f64_s_v: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_minimumnum_f64_s_v: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_f64_s_v: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_f64_s_v: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] +; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_f64_s_v: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_minimumnum_f16_s_s: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f16_e64 v0, s17, s17 -; GFX900-NEXT: v_max_f16_e64 v1, s16, s16 -; GFX900-NEXT: v_min_f16_e32 v0, v1, v0 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_minimumnum_f16_s_s: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f16_e64 v0, s1, s1 -; GFX950-NEXT: v_max_f16_e64 v1, s0, s0 -; GFX950-NEXT: v_min_f16_e32 v0, v1, v0 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_f16_s_s: +; GFX10-LABEL: v_minimumnum_f64_s_v: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f16_e64 v0, s17, s17 -; GFX10-NEXT: v_max_f16_e64 v1, s16, s16 -; GFX10-NEXT: v_min_f16_e32 v0, v1, v0 +; GFX10-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX10-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: v_minimumnum_f16_s_s: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, s1, s1 -; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, s0, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.h, v0.l -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: v_minimumnum_f16_s_s: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_max_f16_e64 v0, s1, s1 -; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, s0, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v1, v0 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-TRUE16-LABEL: v_minimumnum_f16_s_s: -; GFX12-TRUE16: ; %bb.0: -; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.l, s1, s1 -; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, s0, s0 -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.h, v0.l -; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-FAKE16-LABEL: v_minimumnum_f16_s_s: -; GFX12-FAKE16: ; %bb.0: -; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v0, s1, s1 -; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, s0, s0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v1, v0 -; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] - %result = call half @llvm.minimumnum.f16(half %x, half %y) - ret half %result -} - -define float @v_minimumnum_f32_s_v(float inreg %x, float %y) { -; GFX7-LABEL: v_minimumnum_f32_s_v: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s16 -; GFX7-NEXT: v_min_f32_e32 v0, v1, v0 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f32_s_v: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, s16 -; GFX8-NEXT: v_min_f32_e32 v0, v1, v0 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_minimumnum_f32_s_v: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX900-NEXT: v_max_f32_e64 v1, s16, s16 -; GFX900-NEXT: v_min_f32_e32 v0, v1, v0 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_minimumnum_f32_s_v: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX950-NEXT: v_max_f32_e64 v1, s0, s0 -; GFX950-NEXT: v_min_f32_e32 v0, v1, v0 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_f32_s_v: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX10-NEXT: v_max_f32_e64 v1, s16, s16 -; GFX10-NEXT: v_min_f32_e32 v0, v1, v0 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_f32_s_v: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX11-NEXT: v_max_f32_e64 v1, s0, s0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_min_f32_e32 v0, v1, v0 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_f32_s_v: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 -; GFX12-NEXT: v_max_num_f32_e64 v1, s0, s0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_min_num_f32_e32 v0, v1, v0 -; GFX12-NEXT: s_setpc_b64 s[30:31] - %result = call float @llvm.minimumnum.f32(float %x, float %y) - ret float %result -} - -define float @v_minimumnum_f32_v_s(float %x, float inreg %y) { -; GFX7-LABEL: v_minimumnum_f32_v_s: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s16 -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f32_v_s: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, s16 -; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_minimumnum_f32_v_s: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f32_e64 v1, s16, s16 -; GFX900-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX900-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_minimumnum_f32_v_s: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e64 v1, s0, s0 -; GFX950-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX950-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_f32_v_s: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e64 v1, s16, s16 -; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_f32_v_s: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f32_e64 v1, s0, s0 -; GFX11-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_f32_v_s: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f32_e64 v1, s0, s0 -; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 -; GFX12-NEXT: s_setpc_b64 s[30:31] - %result = call float @llvm.minimumnum.f32(float %x, float %y) - ret float %result -} - -define float @v_minimumnum_f32_s_s(float inreg %x, float inreg %y) { -; GFX7-LABEL: v_minimumnum_f32_s_s: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e64 v0, 1.0, s17 -; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s16 -; GFX7-NEXT: v_min_f32_e32 v0, v1, v0 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f32_s_s: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e64 v0, 1.0, s17 -; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, s16 -; GFX8-NEXT: v_min_f32_e32 v0, v1, v0 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_minimumnum_f32_s_s: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f32_e64 v0, s17, s17 -; GFX900-NEXT: v_max_f32_e64 v1, s16, s16 -; GFX900-NEXT: v_min_f32_e32 v0, v1, v0 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_minimumnum_f32_s_s: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e64 v0, s1, s1 -; GFX950-NEXT: v_max_f32_e64 v1, s0, s0 -; GFX950-NEXT: v_min_f32_e32 v0, v1, v0 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_f32_s_s: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e64 v0, s17, s17 -; GFX10-NEXT: v_max_f32_e64 v1, s16, s16 -; GFX10-NEXT: v_min_f32_e32 v0, v1, v0 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_f32_s_s: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f32_e64 v0, s1, s1 -; GFX11-NEXT: v_max_f32_e64 v1, s0, s0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_min_f32_e32 v0, v1, v0 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_f32_s_s: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f32_e64 v0, s1, s1 -; GFX12-NEXT: v_max_num_f32_e64 v1, s0, s0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_min_num_f32_e32 v0, v1, v0 -; GFX12-NEXT: s_setpc_b64 s[30:31] - %result = call float @llvm.minimumnum.f32(float %x, float %y) - ret float %result -} - -define double @v_minimumnum_f64_s_v(double inreg %x, double %y) { -; GFX7-LABEL: v_minimumnum_f64_s_v: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] -; GFX7-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f64_s_v: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] -; GFX8-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_minimumnum_f64_s_v: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] -; GFX900-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_minimumnum_f64_s_v: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] -; GFX950-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_f64_s_v: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] -; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX10-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_f64_s_v: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] -; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: v_minimumnum_f64_s_v: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] +; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_minimumnum_f64_s_v: ; GFX12: ; %bb.0: @@ -1151,37 +1757,69 @@ define double @v_minimumnum_f64_s_v(double inreg %x, double %y) { } define double @v_minimumnum_f64_v_s(double %x, double inreg %y) { -; GFX7-LABEL: v_minimumnum_f64_v_s: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] -; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f64_v_s: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_minimumnum_f64_v_s: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] -; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_minimumnum_f64_v_s: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] -; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX950-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_f64_v_s: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f64_v_s: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f64_v_s: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f64_v_s: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_minimumnum_f64_v_s: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_f64_v_s: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_f64_v_s: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_f64_v_s: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-GISEL-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] +; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_f64_v_s: ; GFX10: ; %bb.0: @@ -1217,184 +1855,354 @@ define double @v_minimumnum_f64_v_s(double %x, double inreg %y) { } define double @v_minimumnum_f64_s_s(double inreg %x, double inreg %y) { -; GFX7-LABEL: v_minimumnum_f64_s_s: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] -; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] -; GFX7-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f64_s_s: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] -; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] -; GFX8-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_minimumnum_f64_s_s: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] -; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] -; GFX900-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_minimumnum_f64_s_s: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3] -; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] -; GFX950-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_f64_s_s: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] -; GFX10-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] -; GFX10-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_f64_s_s: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3] -; GFX11-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_f64_s_s: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f64_e64 v[0:1], s[2:3], s[2:3] -; GFX12-NEXT: v_max_num_f64_e64 v[2:3], s[0:1], s[0:1] -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[2:3], v[0:1] -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_f64_s_s: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] +; GFX7-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f64_s_s: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], s[16:17], s[16:17] +; GFX7-GISEL-NEXT: v_max_f64 v[2:3], s[18:19], s[18:19] +; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f64_s_s: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] +; GFX8-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f64_s_s: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], s[16:17], s[16:17] +; GFX8-GISEL-NEXT: v_max_f64 v[2:3], s[18:19], s[18:19] +; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_minimumnum_f64_s_s: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] +; GFX900-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_f64_s_s: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], s[16:17], s[16:17] +; GFX900-GISEL-NEXT: v_max_f64 v[2:3], s[18:19], s[18:19] +; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_f64_s_s: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] +; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_f64_s_s: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], s[0:1], s[0:1] +; GFX950-GISEL-NEXT: v_max_f64 v[2:3], s[2:3], s[2:3] +; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_f64_s_s: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] +; GFX10-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] +; GFX10-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_f64_s_s: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f64 v[0:1], s[16:17], s[16:17] +; GFX10-GISEL-NEXT: v_max_f64 v[2:3], s[18:19], s[18:19] +; GFX10-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_f64_s_s: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3] +; GFX11-SDAG-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_f64_s_s: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f64 v[0:1], s[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_max_f64 v[2:3], s[2:3], s[2:3] +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_f64_s_s: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f64_e64 v[0:1], s[2:3], s[2:3] +; GFX12-SDAG-NEXT: v_max_num_f64_e64 v[2:3], s[0:1], s[0:1] +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[0:1], v[2:3], v[0:1] +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_f64_s_s: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f64_e64 v[0:1], s[0:1], s[0:1] +; GFX12-GISEL-NEXT: v_max_num_f64_e64 v[2:3], s[2:3], s[2:3] +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call double @llvm.minimumnum.f64(double %x, double %y) ret double %result } define float @v_minimumnum_f32_fabs_rhs(float %x, float %y) { -; GFX7-LABEL: v_minimumnum_f32_fabs_rhs: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, |v1| -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f32_fabs_rhs: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, |v1| -; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_minimumnum_f32_fabs_rhs: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v1, |v1|, |v1| -; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_f32_fabs_rhs: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e64 v1, |v1|, |v1| -; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_f32_fabs_rhs: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f32_e64 v1, |v1|, |v1| -; GFX11-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_f32_fabs_rhs: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| -; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_f32_fabs_rhs: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, |v1| +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f32_fabs_rhs: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, |v1| +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f32_fabs_rhs: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, |v1| +; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f32_fabs_rhs: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, |v1| +; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_minimumnum_f32_fabs_rhs: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_minimumnum_f32_fabs_rhs: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX9-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_f32_fabs_rhs: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_f32_fabs_rhs: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_f32_fabs_rhs: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_f32_fabs_rhs: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX11-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_f32_fabs_rhs: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| +; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_f32_fabs_rhs: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %fabs.y = call float @llvm.fabs.f32(float %y) %result = call float @llvm.minimumnum.f32(float %x, float %fabs.y) ret float %result } define float @v_minimumnum_f32_fneg_fabs_rhs(float %x, float %y) { -; GFX7-LABEL: v_minimumnum_f32_fneg_fabs_rhs: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e64 v1, -1.0, |v1| -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f32_fneg_fabs_rhs: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e64 v1, -1.0, |v1| -; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_minimumnum_f32_fneg_fabs_rhs: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| -; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_f32_fneg_fabs_rhs: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| -; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_f32_fneg_fabs_rhs: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| -; GFX11-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_f32_fneg_fabs_rhs: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1| -; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, -1.0, |v1| +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, -1.0, |v1| +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, -1.0, |v1| +; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, -1.0, |v1| +; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX11-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1| +; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0 +; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1| +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %fabs.y = call float @llvm.fabs.f32(float %y) %fneg.fabs.y = fneg float %fabs.y %result = call float @llvm.minimumnum.f32(float %x, float %fneg.fabs.y) @@ -1402,59 +2210,113 @@ define float @v_minimumnum_f32_fneg_fabs_rhs(float %x, float %y) { } define float @v_minimumnum_f32_fabs(float %x, float %y) { -; GFX7-LABEL: v_minimumnum_f32_fabs: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, |v1| -; GFX7-NEXT: v_mul_f32_e64 v0, 1.0, |v0| -; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f32_fabs: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e64 v1, 1.0, |v1| -; GFX8-NEXT: v_mul_f32_e64 v0, 1.0, |v0| -; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_minimumnum_f32_fabs: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v1, |v1|, |v1| -; GFX9-NEXT: v_max_f32_e64 v0, |v0|, |v0| -; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_f32_fabs: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e64 v1, |v1|, |v1| -; GFX10-NEXT: v_max_f32_e64 v0, |v0|, |v0| -; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_f32_fabs: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f32_e64 v1, |v1|, |v1| -; GFX11-NEXT: v_max_f32_e64 v0, |v0|, |v0| -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_f32_fabs: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| -; GFX12-NEXT: v_max_num_f32_e64 v0, |v0|, |v0| -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_f32_fabs: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, |v1| +; GFX7-SDAG-NEXT: v_mul_f32_e64 v0, 1.0, |v0| +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f32_fabs: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, |v0| +; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, |v1| +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f32_fabs: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, |v1| +; GFX8-SDAG-NEXT: v_mul_f32_e64 v0, 1.0, |v0| +; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f32_fabs: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, |v0| +; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, |v1| +; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_minimumnum_f32_fabs: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX9-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0| +; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_minimumnum_f32_fabs: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0| +; GFX9-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_f32_fabs: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX10-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0| +; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_f32_fabs: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0| +; GFX10-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_f32_fabs: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX11-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0| +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_f32_fabs: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0| +; GFX11-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_f32_fabs: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| +; GFX12-SDAG-NEXT: v_max_num_f32_e64 v0, |v0|, |v0| +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_f32_fabs: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f32_e64 v0, |v0|, |v0| +; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %fabs.x = call float @llvm.fabs.f32(float %x) %fabs.y = call float @llvm.fabs.f32(float %y) %result = call float @llvm.minimumnum.f32(float %fabs.x, float %fabs.y) @@ -1462,59 +2324,113 @@ define float @v_minimumnum_f32_fabs(float %x, float %y) { } define float @v_minimumnum_f32_fneg(float %x, float %y) { -; GFX7-LABEL: v_minimumnum_f32_fneg: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e32 v1, -1.0, v1 -; GFX7-NEXT: v_mul_f32_e32 v0, -1.0, v0 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f32_fneg: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e32 v1, -1.0, v1 -; GFX8-NEXT: v_mul_f32_e32 v0, -1.0, v0 -; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_minimumnum_f32_fneg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v1, -v1, -v1 -; GFX9-NEXT: v_max_f32_e64 v0, -v0, -v0 -; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_f32_fneg: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e64 v1, -v1, -v1 -; GFX10-NEXT: v_max_f32_e64 v0, -v0, -v0 -; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_f32_fneg: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f32_e64 v1, -v1, -v1 -; GFX11-NEXT: v_max_f32_e64 v0, -v0, -v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_f32_fneg: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f32_e64 v1, -v1, -v1 -; GFX12-NEXT: v_max_num_f32_e64 v0, -v0, -v0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_f32_fneg: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, -1.0, v1 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, -1.0, v0 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f32_fneg: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, -1.0, v0 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, -1.0, v1 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f32_fneg: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, -1.0, v1 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, -1.0, v0 +; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f32_fneg: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, -1.0, v0 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, -1.0, v1 +; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_minimumnum_f32_fneg: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX9-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_minimumnum_f32_fneg: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_f32_fneg: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX10-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_f32_fneg: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GFX10-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_f32_fneg: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX11-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_f32_fneg: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GFX11-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_f32_fneg: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, -v1, -v1 +; GFX12-SDAG-NEXT: v_max_num_f32_e64 v0, -v0, -v0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_f32_fneg: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f32_e64 v0, -v0, -v0 +; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, -v1, -v1 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %fneg.x = fneg float %x %fneg.y = fneg float %y %result = call float @llvm.minimumnum.f32(float %fneg.x, float %fneg.y) @@ -1522,166 +2438,320 @@ define float @v_minimumnum_f32_fneg(float %x, float %y) { } define half @v_minimumnum_f16_fabs_rhs(half %x, half %y) { -; GFX7-LABEL: v_minimumnum_f16_fabs_rhs: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e64 v1, |v1| -; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f16_fabs_rhs: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_minimumnum_f16_fabs_rhs: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_f16_fabs_rhs: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX10-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-TRUE16-LABEL: v_minimumnum_f16_fabs_rhs: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l| -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: v_minimumnum_f16_fabs_rhs: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-TRUE16-LABEL: v_minimumnum_f16_fabs_rhs: -; GFX12-TRUE16: ; %bb.0: -; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h -; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-FAKE16-LABEL: v_minimumnum_f16_fabs_rhs: -; GFX12-FAKE16: ; %bb.0: -; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 -; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1| +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, |v1| +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX9-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX11-TRUE16-SDAG: ; %bb.0: +; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX11-TRUE16-GISEL: ; %bb.0: +; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX11-FAKE16-SDAG: ; %bb.0: +; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX11-FAKE16-GISEL: ; %bb.0: +; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX12-TRUE16-SDAG: ; %bb.0: +; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX12-TRUE16-GISEL: ; %bb.0: +; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX12-FAKE16-SDAG: ; %bb.0: +; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX12-FAKE16-GISEL: ; %bb.0: +; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| +; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] %fabs.y = call half @llvm.fabs.f16(half %y) %result = call half @llvm.minimumnum.f16(half %x, half %fabs.y) ret half %result } define half @v_minimumnum_f16_fneg_fabs_rhs(half %x, half %y) { -; GFX7-LABEL: v_minimumnum_f16_fneg_fabs_rhs: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e64 v1, -|v1| -; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f16_fneg_fabs_rhs: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_minimumnum_f16_fneg_fabs_rhs: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| -; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_f16_fneg_fabs_rhs: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| -; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX10-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-TRUE16-LABEL: v_minimumnum_f16_fneg_fabs_rhs: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, -|v1.l|, -|v1.l| -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: v_minimumnum_f16_fneg_fabs_rhs: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| -; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-TRUE16-LABEL: v_minimumnum_f16_fneg_fabs_rhs: -; GFX12-TRUE16: ; %bb.0: -; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l| -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h -; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-FAKE16-LABEL: v_minimumnum_f16_fneg_fabs_rhs: -; GFX12-FAKE16: ; %bb.0: -; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1| -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 -; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v1, -|v1| +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, -|v1| +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX9-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX11-TRUE16-SDAG: ; %bb.0: +; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, -|v1.l|, -|v1.l| +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX11-TRUE16-GISEL: ; %bb.0: +; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, -|v1.l|, -|v1.l| +; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX11-FAKE16-SDAG: ; %bb.0: +; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX11-FAKE16-GISEL: ; %bb.0: +; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX12-TRUE16-SDAG: ; %bb.0: +; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l| +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX12-TRUE16-GISEL: ; %bb.0: +; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l| +; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX12-FAKE16-SDAG: ; %bb.0: +; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1| +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX12-FAKE16-GISEL: ; %bb.0: +; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1| +; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] %fabs.y = call half @llvm.fabs.f16(half %y) %fneg.fabs.y = fneg half %fabs.y %result = call half @llvm.minimumnum.f16(half %x, half %fneg.fabs.y) @@ -1689,83 +2759,160 @@ define half @v_minimumnum_f16_fneg_fabs_rhs(half %x, half %y) { } define half @v_minimumnum_f16_fabs(half %x, half %y) { -; GFX7-LABEL: v_minimumnum_f16_fabs: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e64 v0, |v0| -; GFX7-NEXT: v_cvt_f32_f16_e64 v1, |v1| -; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f16_fabs: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX8-NEXT: v_max_f16_e64 v0, |v0|, |v0| -; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_minimumnum_f16_fabs: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX9-NEXT: v_max_f16_e64 v0, |v0|, |v0| -; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_f16_fabs: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX10-NEXT: v_max_f16_e64 v0, |v0|, |v0| -; GFX10-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-TRUE16-LABEL: v_minimumnum_f16_fabs: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l| -; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, |v0.l|, |v0.l| -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: v_minimumnum_f16_fabs: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX11-FAKE16-NEXT: v_max_f16_e64 v0, |v0|, |v0| -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-TRUE16-LABEL: v_minimumnum_f16_fabs: -; GFX12-TRUE16: ; %bb.0: -; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| -; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l| -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h -; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-FAKE16-LABEL: v_minimumnum_f16_fabs: -; GFX12-FAKE16: ; %bb.0: -; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| -; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v0, |v0|, |v0| -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 -; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_f16_fabs: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1| +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f16_fabs: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, |v1| +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f16_fabs: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX8-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f16_fabs: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX8-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_minimumnum_f16_fabs: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX9-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_minimumnum_f16_fabs: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX9-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_f16_fabs: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX10-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_f16_fabs: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX10-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16_fabs: +; GFX11-TRUE16-SDAG: ; %bb.0: +; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.l, |v0.l|, |v0.l| +; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16_fabs: +; GFX11-TRUE16-GISEL: ; %bb.0: +; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.l, |v0.l|, |v0.l| +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16_fabs: +; GFX11-FAKE16-SDAG: ; %bb.0: +; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16_fabs: +; GFX11-FAKE16-GISEL: ; %bb.0: +; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_fabs: +; GFX12-TRUE16-SDAG: ; %bb.0: +; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l| +; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16_fabs: +; GFX12-TRUE16-GISEL: ; %bb.0: +; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l| +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| +; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16_fabs: +; GFX12-FAKE16-SDAG: ; %bb.0: +; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, |v0|, |v0| +; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16_fabs: +; GFX12-FAKE16-GISEL: ; %bb.0: +; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, |v0|, |v0| +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| +; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] %fabs.x = call half @llvm.fabs.f16(half %x) %fabs.y = call half @llvm.fabs.f16(half %y) %result = call half @llvm.minimumnum.f16(half %fabs.x, half %fabs.y) @@ -1773,83 +2920,160 @@ define half @v_minimumnum_f16_fabs(half %x, half %y) { } define half @v_minimumnum_f16_fneg(half %x, half %y) { -; GFX7-LABEL: v_minimumnum_f16_fneg: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -v0 -; GFX7-NEXT: v_cvt_f16_f32_e64 v1, -v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f16_fneg: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_e64 v1, -v1, -v1 -; GFX8-NEXT: v_max_f16_e64 v0, -v0, -v0 -; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_minimumnum_f16_fneg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f16_e64 v1, -v1, -v1 -; GFX9-NEXT: v_max_f16_e64 v0, -v0, -v0 -; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_f16_fneg: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f16_e64 v1, -v1, -v1 -; GFX10-NEXT: v_max_f16_e64 v0, -v0, -v0 -; GFX10-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-TRUE16-LABEL: v_minimumnum_f16_fneg: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.h, -v1.l, -v1.l -; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, -v0.l, -v0.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: v_minimumnum_f16_fneg: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_max_f16_e64 v1, -v1, -v1 -; GFX11-FAKE16-NEXT: v_max_f16_e64 v0, -v0, -v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-TRUE16-LABEL: v_minimumnum_f16_fneg: -; GFX12-TRUE16: ; %bb.0: -; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l -; GFX12-TRUE16-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h -; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-FAKE16-LABEL: v_minimumnum_f16_fneg: -; GFX12-FAKE16: ; %bb.0: -; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v1, -v1, -v1 -; GFX12-FAKE16-NEXT: v_max_num_f16_e64 v0, -v0, -v0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 -; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_f16_fneg: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e64 v1, -v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f16_fneg: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, -v1 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f16_fneg: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX8-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f16_fneg: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX8-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_minimumnum_f16_fneg: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX9-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_minimumnum_f16_fneg: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX9-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_f16_fneg: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX10-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_f16_fneg: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX10-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16_fneg: +; GFX11-TRUE16-SDAG: ; %bb.0: +; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, -v1.l, -v1.l +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.l, -v0.l, -v0.l +; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16_fneg: +; GFX11-TRUE16-GISEL: ; %bb.0: +; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.l, -v0.l, -v0.l +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, -v1.l, -v1.l +; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16_fneg: +; GFX11-FAKE16-SDAG: ; %bb.0: +; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16_fneg: +; GFX11-FAKE16-GISEL: ; %bb.0: +; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_fneg: +; GFX12-TRUE16-SDAG: ; %bb.0: +; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l +; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16_fneg: +; GFX12-TRUE16-GISEL: ; %bb.0: +; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l +; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16_fneg: +; GFX12-FAKE16-SDAG: ; %bb.0: +; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, -v1, -v1 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, -v0, -v0 +; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16_fneg: +; GFX12-FAKE16-GISEL: ; %bb.0: +; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, -v0, -v0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, -v1, -v1 +; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] %fneg.x = fneg half %x %fneg.y = fneg half %y %result = call half @llvm.minimumnum.f16(half %fneg.x, half %fneg.y) @@ -1857,166 +3081,313 @@ define half @v_minimumnum_f16_fneg(half %x, half %y) { } define double @v_minimumnum_f64_fneg(double %x, double %y) { -; GFX7-LABEL: v_minimumnum_f64_fneg: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] -; GFX7-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] -; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f64_fneg: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] -; GFX8-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] -; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_minimumnum_f64_fneg: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] -; GFX9-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] -; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_f64_fneg: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] -; GFX10-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] -; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_f64_fneg: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] -; GFX11-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_f64_fneg: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f64_e64 v[2:3], -v[2:3], -v[2:3] -; GFX12-NEXT: v_max_num_f64_e64 v[0:1], -v[0:1], -v[0:1] -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_f64_fneg: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f64_fneg: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX7-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f64_fneg: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f64_fneg: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX8-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_minimumnum_f64_fneg: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX9-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX9-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_minimumnum_f64_fneg: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX9-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX9-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_f64_fneg: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX10-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX10-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_f64_fneg: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX10-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX10-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_f64_fneg: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX11-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_f64_fneg: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX11-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_f64_fneg: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f64_e64 v[2:3], -v[2:3], -v[2:3] +; GFX12-SDAG-NEXT: v_max_num_f64_e64 v[0:1], -v[0:1], -v[0:1] +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_f64_fneg: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f64_e64 v[0:1], -v[0:1], -v[0:1] +; GFX12-GISEL-NEXT: v_max_num_f64_e64 v[2:3], -v[2:3], -v[2:3] +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %fneg.x = fneg double %x %fneg.y = fneg double %y %result = call double @llvm.minimumnum.f64(double %fneg.x, double %fneg.y) ret double %result -} - -define <2 x half> @v_minimumnum_v2f16(<2 x half> %x, <2 x half> %y) { -; GFX7-LABEL: v_minimumnum_v2f16: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v2 -; GFX7-NEXT: v_min_f32_e32 v1, v1, v3 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_v2f16: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_min_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_minimumnum_v2f16: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX900-NEXT: v_pk_min_f16 v0, v0, v1 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_minimumnum_v2f16: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_pk_min_f16 v0, v0, v1 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_v2f16: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX10-NEXT: v_pk_min_f16 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_v2f16: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_pk_min_f16 v0, v0, v1 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_v2f16: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v1 -; GFX12-NEXT: s_setpc_b64 s[30:31] +} + +define <2 x half> @v_minimumnum_v2f16(<2 x half> %x, <2 x half> %y) { +; GFX7-SDAG-LABEL: v_minimumnum_v2f16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_v2f16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_v2f16: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_v2f16: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v3, v1, v1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v2, v3 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_minimumnum_v2f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_v2f16: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-GISEL-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_v2f16: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-SDAG-NEXT: s_nop 0 +; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_v2f16: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-GISEL-NEXT: s_nop 0 +; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_v2f16: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_v2f16: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_v2f16: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_v2f16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_v2f16: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v1 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_v2f16: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v1 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> %x, <2 x half> %y) ret <2 x half> %result } define <2 x half> @v_minimumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) { -; GFX7-LABEL: v_minimumnum_v2f16_nnan: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v2 -; GFX7-NEXT: v_min_f32_e32 v1, v1, v3 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_v2f16_nnan: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_v2f16_nnan: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_v2f16_nnan: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_v2f16_nnan: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_v2f16_nnan: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v0, v1 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_minimumnum_v2f16_nnan: ; GFX9: ; %bb.0: @@ -2050,135 +3421,243 @@ define <2 x half> @v_minimumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) { } define <3 x half> @v_minimumnum_v3f16(<3 x half> %x, <3 x half> %y) { -; GFX7-LABEL: v_minimumnum_v3f16: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v3 -; GFX7-NEXT: v_min_f32_e32 v1, v1, v4 -; GFX7-NEXT: v_min_f32_e32 v2, v2, v5 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_v3f16: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v4, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v5, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_min_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_e32 v3, v3, v3 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX8-NEXT: v_min_f16_e32 v0, v0, v2 -; GFX8-NEXT: v_min_f16_e32 v1, v1, v3 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_minimumnum_v3f16: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX900-NEXT: v_pk_min_f16 v0, v0, v2 -; GFX900-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX900-NEXT: v_pk_min_f16 v1, v1, v2 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_minimumnum_v3f16: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX950-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX950-NEXT: v_pk_min_f16 v0, v0, v2 -; GFX950-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_pk_min_f16 v1, v1, v2 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_v3f16: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX10-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX10-NEXT: v_pk_min_f16 v0, v0, v2 -; GFX10-NEXT: v_pk_min_f16 v1, v1, v3 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_v3f16: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX11-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_pk_min_f16 v0, v0, v2 -; GFX11-NEXT: v_pk_min_f16 v1, v1, v3 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_v3f16: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2 -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 -; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3 -; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2 -; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_v3f16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v4 +; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v5 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_v3f16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v5 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_v3f16: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_sdwa v4, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v5, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v3 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_v3f16: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v5, v2, v2 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v3, v3, v3 +; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v4, v5 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_min_f16_e32 v1, v1, v3 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_minimumnum_v3f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v2 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_minimumnum_v3f16: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX9-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX9-GISEL-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_v3f16: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX950-SDAG-NEXT: s_nop 0 +; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v2 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_v3f16: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX10-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_v3f16: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX10-GISEL-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_v3f16: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX11-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_v3f16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX11-GISEL-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_v3f16: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v2 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v3 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_v3f16: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v2 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v3 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <3 x half> @llvm.minimumnum.v3f16(<3 x half> %x, <3 x half> %y) ret <3 x half> %result } define <3 x half> @v_minimumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) { -; GFX7-LABEL: v_minimumnum_v3f16_nnan: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v3 -; GFX7-NEXT: v_min_f32_e32 v1, v1, v4 -; GFX7-NEXT: v_min_f32_e32 v2, v2, v5 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_v3f16_nnan: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_e32 v0, v0, v2 -; GFX8-NEXT: v_min_f16_e32 v1, v1, v3 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_v3f16_nnan: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v4 +; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v5 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_v3f16_nnan: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v5 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_v3f16_nnan: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v3 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_v3f16_nnan: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v0, v2 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v1, v1, v3 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_minimumnum_v3f16_nnan: ; GFX9: ; %bb.0: @@ -2216,151 +3695,273 @@ define <3 x half> @v_minimumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) { } define <4 x half> @v_minimumnum_v4f16(<4 x half> %x, <4 x half> %y) { -; GFX7-LABEL: v_minimumnum_v4f16: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 -; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 -; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v4 -; GFX7-NEXT: v_min_f32_e32 v1, v1, v5 -; GFX7-NEXT: v_min_f32_e32 v2, v2, v6 -; GFX7-NEXT: v_min_f32_e32 v3, v3, v7 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_v4f16: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v4, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v5, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v5, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v6, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v3, v3, v3 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX8-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_min_f16_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_min_f16_e32 v1, v1, v3 -; GFX8-NEXT: v_min_f16_e32 v0, v0, v2 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_minimumnum_v4f16: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX900-NEXT: v_pk_min_f16 v0, v0, v2 -; GFX900-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX900-NEXT: v_pk_min_f16 v1, v1, v2 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_minimumnum_v4f16: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX950-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX950-NEXT: v_pk_min_f16 v0, v0, v2 -; GFX950-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_pk_min_f16 v1, v1, v2 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_v4f16: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX10-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX10-NEXT: v_pk_min_f16 v0, v0, v2 -; GFX10-NEXT: v_pk_min_f16 v1, v1, v3 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_v4f16: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX11-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_pk_min_f16 v0, v0, v2 -; GFX11-NEXT: v_pk_min_f16 v1, v1, v3 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_v4f16: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2 -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 -; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3 -; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2 -; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_v4f16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v5 +; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v6 +; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v7 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_v4f16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v6 +; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v5 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v7 +; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v5 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_v4f16: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_sdwa v4, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v5, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v5, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v6, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v3 +; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v5 +; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_v4f16: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v5, v1, v1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v6, v2, v2 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v7, v3, v3 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v4, v6 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v5, v7 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_minimumnum_v4f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v2 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_minimumnum_v4f16: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX9-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX9-GISEL-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_v4f16: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX950-SDAG-NEXT: s_nop 0 +; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v2 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_v4f16: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX10-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_v4f16: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX10-GISEL-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_v4f16: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX11-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_v4f16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX11-GISEL-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_v4f16: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v2 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v3 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_v4f16: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v2 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v3 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <4 x half> @llvm.minimumnum.v4f16(<4 x half> %x, <4 x half> %y) ret <4 x half> %result } define <4 x half> @v_minimumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) { -; GFX7-LABEL: v_minimumnum_v4f16_nnan: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 -; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 -; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 -; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 -; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v4 -; GFX7-NEXT: v_min_f32_e32 v1, v1, v5 -; GFX7-NEXT: v_min_f32_e32 v2, v2, v6 -; GFX7-NEXT: v_min_f32_e32 v3, v3, v7 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_v4f16_nnan: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_min_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_e32 v1, v1, v3 -; GFX8-NEXT: v_min_f16_e32 v0, v0, v2 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_v4f16_nnan: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v5 +; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v6 +; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v7 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_v4f16_nnan: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v6 +; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v5 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v7 +; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v5 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_v4f16_nnan: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_min_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v3 +; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v5 +; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_v4f16_nnan: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v0, v2 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v1, v3 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_minimumnum_v4f16_nnan: ; GFX9: ; %bb.0: @@ -2398,1349 +3999,2545 @@ define <4 x half> @v_minimumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) { } define <6 x half> @v_minimumnum_v6f16(<6 x half> %x, <6 x half> %y) { -; GFX7-LABEL: v_minimumnum_v6f16: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 -; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11 -; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9 -; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 -; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 -; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11 -; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 -; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 -; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8 -; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v6 -; GFX7-NEXT: v_min_f32_e32 v1, v1, v7 -; GFX7-NEXT: v_min_f32_e32 v2, v2, v8 -; GFX7-NEXT: v_min_f32_e32 v3, v3, v9 -; GFX7-NEXT: v_min_f32_e32 v4, v4, v10 -; GFX7-NEXT: v_min_f32_e32 v5, v5, v11 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_v6f16: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v6, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v7, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_sdwa v6, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v7, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v8, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_sdwa v7, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v8, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v9, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX8-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX8-NEXT: v_max_f16_e32 v4, v4, v4 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX8-NEXT: v_max_f16_e32 v3, v3, v3 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_min_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_min_f16_e32 v2, v2, v5 -; GFX8-NEXT: v_min_f16_e32 v1, v1, v4 -; GFX8-NEXT: v_min_f16_e32 v0, v0, v3 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v7 -; GFX8-NEXT: v_or_b32_e32 v2, v2, v6 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_minimumnum_v6f16: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX900-NEXT: v_pk_min_f16 v0, v0, v3 -; GFX900-NEXT: v_pk_max_f16 v3, v4, v4 -; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX900-NEXT: v_pk_min_f16 v1, v1, v3 -; GFX900-NEXT: v_pk_max_f16 v3, v5, v5 -; GFX900-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX900-NEXT: v_pk_min_f16 v2, v2, v3 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_minimumnum_v6f16: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX950-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX950-NEXT: v_pk_min_f16 v0, v0, v3 -; GFX950-NEXT: v_pk_max_f16 v3, v4, v4 -; GFX950-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX950-NEXT: v_pk_min_f16 v1, v1, v3 -; GFX950-NEXT: v_pk_max_f16 v3, v5, v5 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_pk_min_f16 v2, v2, v3 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_v6f16: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX10-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX10-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX10-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX10-NEXT: v_pk_min_f16 v0, v0, v3 -; GFX10-NEXT: v_pk_min_f16 v1, v1, v4 -; GFX10-NEXT: v_pk_min_f16 v2, v2, v5 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_v6f16: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX11-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX11-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX11-NEXT: v_pk_min_f16 v0, v0, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_pk_min_f16 v1, v1, v4 -; GFX11-NEXT: v_pk_min_f16 v2, v2, v5 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_v6f16: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3 -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 -; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4 -; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 -; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5 -; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2 -; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v3 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v4 -; GFX12-NEXT: v_pk_min_num_f16 v2, v2, v5 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_v6f16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v11, v11 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v10, v10 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v9, v9 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v8, v8 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v11, v11 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v10, v10 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v9, v9 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v6 +; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v7 +; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v8 +; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v9 +; GFX7-SDAG-NEXT: v_min_f32_e32 v4, v4, v10 +; GFX7-SDAG-NEXT: v_min_f32_e32 v5, v5, v11 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_v6f16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v6 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v8 +; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v7 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v9 +; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v6 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v10 +; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v7 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v11 +; GFX7-GISEL-NEXT: v_min_f32_e32 v4, v4, v6 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: v_min_f32_e32 v5, v5, v7 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_v6f16: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_sdwa v6, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v7, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v6, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v7, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v8, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v7, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v8, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v9, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_min_f16_e32 v2, v2, v5 +; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v4 +; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v3 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v8 +; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v7 +; GFX8-SDAG-NEXT: v_or_b32_e32 v2, v2, v6 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_v6f16: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v6, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v7, v1, v1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v8, v2, v2 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v9, v3, v3 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v10, v4, v4 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v11, v5, v5 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v6, v6, v9 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v7, v10 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v8, v11 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v6, v0 +; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v2, v4, v2 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_minimumnum_v6f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v3 +; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v4, v4 +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v5, v5 +; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX900-SDAG-NEXT: v_pk_min_f16 v2, v2, v3 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_minimumnum_v6f16: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX9-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX9-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX9-GISEL-NEXT: v_pk_min_f16 v0, v0, v3 +; GFX9-GISEL-NEXT: v_pk_min_f16 v1, v1, v4 +; GFX9-GISEL-NEXT: v_pk_min_f16 v2, v2, v5 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_v6f16: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v3 +; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v4, v4 +; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v5, v5 +; GFX950-SDAG-NEXT: s_nop 0 +; GFX950-SDAG-NEXT: v_pk_min_f16 v2, v2, v3 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_v6f16: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v3 +; GFX10-SDAG-NEXT: v_pk_min_f16 v1, v1, v4 +; GFX10-SDAG-NEXT: v_pk_min_f16 v2, v2, v5 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_v6f16: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v3 +; GFX10-GISEL-NEXT: v_pk_min_f16 v1, v1, v4 +; GFX10-GISEL-NEXT: v_pk_min_f16 v2, v2, v5 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_v6f16: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v3 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-SDAG-NEXT: v_pk_min_f16 v1, v1, v4 +; GFX11-SDAG-NEXT: v_pk_min_f16 v2, v2, v5 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_v6f16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v3 +; GFX11-GISEL-NEXT: v_pk_min_f16 v1, v1, v4 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-GISEL-NEXT: v_pk_min_f16 v2, v2, v5 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_v6f16: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v3 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v4 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v2, v2, v5 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_v6f16: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v3 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v4 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v2, v2, v5 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <6 x half> @llvm.minimumnum.v6f16(<6 x half> %x, <6 x half> %y) ret <6 x half> %result } define <8 x half> @v_minimumnum_v8f16(<8 x half> %x, <8 x half> %y) { -; GFX7-LABEL: v_minimumnum_v8f16: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 -; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v15 -; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 -; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14 -; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 -; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13 -; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11 -; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8 -; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9 -; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 -; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15 -; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 -; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14 -; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v13 -; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8 -; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9 -; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10 -; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v8 -; GFX7-NEXT: v_min_f32_e32 v1, v1, v9 -; GFX7-NEXT: v_min_f32_e32 v2, v2, v10 -; GFX7-NEXT: v_min_f32_e32 v3, v3, v11 -; GFX7-NEXT: v_min_f32_e32 v4, v4, v12 -; GFX7-NEXT: v_min_f32_e32 v5, v5, v13 -; GFX7-NEXT: v_min_f32_e32 v6, v6, v14 -; GFX7-NEXT: v_min_f32_e32 v7, v7, v15 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_v8f16: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v8, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v9, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v9, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v10, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v10, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v11, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_sdwa v10, v11, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v11, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v12, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v7, v7, v7 -; GFX8-NEXT: v_max_f16_e32 v3, v3, v3 -; GFX8-NEXT: v_max_f16_e32 v6, v6, v6 -; GFX8-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX8-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX8-NEXT: v_max_f16_e32 v4, v4, v4 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_min_f16_sdwa v11, v12, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_min_f16_e32 v3, v3, v7 -; GFX8-NEXT: v_min_f16_e32 v2, v2, v6 -; GFX8-NEXT: v_min_f16_e32 v1, v1, v5 -; GFX8-NEXT: v_min_f16_e32 v0, v0, v4 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v11 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v10 -; GFX8-NEXT: v_or_b32_e32 v2, v2, v9 -; GFX8-NEXT: v_or_b32_e32 v3, v3, v8 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_minimumnum_v8f16: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX900-NEXT: v_pk_min_f16 v0, v0, v4 -; GFX900-NEXT: v_pk_max_f16 v4, v5, v5 -; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX900-NEXT: v_pk_min_f16 v1, v1, v4 -; GFX900-NEXT: v_pk_max_f16 v4, v6, v6 -; GFX900-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX900-NEXT: v_pk_min_f16 v2, v2, v4 -; GFX900-NEXT: v_pk_max_f16 v4, v7, v7 -; GFX900-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX900-NEXT: v_pk_min_f16 v3, v3, v4 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_minimumnum_v8f16: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX950-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX950-NEXT: v_pk_min_f16 v0, v0, v4 -; GFX950-NEXT: v_pk_max_f16 v4, v5, v5 -; GFX950-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX950-NEXT: v_pk_min_f16 v1, v1, v4 -; GFX950-NEXT: v_pk_max_f16 v4, v6, v6 -; GFX950-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX950-NEXT: v_pk_min_f16 v2, v2, v4 -; GFX950-NEXT: v_pk_max_f16 v4, v7, v7 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_pk_min_f16 v3, v3, v4 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_v8f16: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX10-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX10-NEXT: v_pk_max_f16 v6, v6, v6 -; GFX10-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX10-NEXT: v_pk_max_f16 v7, v7, v7 -; GFX10-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX10-NEXT: v_pk_min_f16 v0, v0, v4 -; GFX10-NEXT: v_pk_min_f16 v1, v1, v5 -; GFX10-NEXT: v_pk_min_f16 v2, v2, v6 -; GFX10-NEXT: v_pk_min_f16 v3, v3, v7 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_v8f16: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX11-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX11-NEXT: v_pk_max_f16 v6, v6, v6 -; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX11-NEXT: v_pk_max_f16 v7, v7, v7 -; GFX11-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX11-NEXT: v_pk_min_f16 v0, v0, v4 -; GFX11-NEXT: v_pk_min_f16 v1, v1, v5 -; GFX11-NEXT: v_pk_min_f16 v2, v2, v6 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-NEXT: v_pk_min_f16 v3, v3, v7 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_v8f16: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4 -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 -; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5 -; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 -; GFX12-NEXT: v_pk_max_num_f16 v6, v6, v6 -; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2 -; GFX12-NEXT: v_pk_max_num_f16 v7, v7, v7 -; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3 -; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v4 -; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v5 -; GFX12-NEXT: v_pk_min_num_f16 v2, v2, v6 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX12-NEXT: v_pk_min_num_f16 v3, v3, v7 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_v8f16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v15, v15 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v14, v14 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v13, v13 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v12, v12 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v11, v11 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v10, v10 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v8, v8 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v9, v9 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v15, v15 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v14, v14 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v13, v13 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v12, v12 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v9, v9 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v10, v10 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v11, v11 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v8 +; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v9 +; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v10 +; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v11 +; GFX7-SDAG-NEXT: v_min_f32_e32 v4, v4, v12 +; GFX7-SDAG-NEXT: v_min_f32_e32 v5, v5, v13 +; GFX7-SDAG-NEXT: v_min_f32_e32 v6, v6, v14 +; GFX7-SDAG-NEXT: v_min_f32_e32 v7, v7, v15 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_v8f16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v9 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v8 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v10 +; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v9 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v11 +; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v8 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v12 +; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v9 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v13 +; GFX7-GISEL-NEXT: v_min_f32_e32 v4, v4, v8 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v14 +; GFX7-GISEL-NEXT: v_min_f32_e32 v5, v5, v9 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v15 +; GFX7-GISEL-NEXT: v_min_f32_e32 v6, v6, v8 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: v_min_f32_e32 v7, v7, v9 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_v8f16: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_sdwa v8, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v9, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v9, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v10, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v10, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v11, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v10, v11, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v11, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v12, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v7, v7, v7 +; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3 +; GFX8-SDAG-NEXT: v_max_f16_e32 v6, v6, v6 +; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v11, v12, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_min_f16_e32 v3, v3, v7 +; GFX8-SDAG-NEXT: v_min_f16_e32 v2, v2, v6 +; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v5 +; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v4 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v11 +; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v10 +; GFX8-SDAG-NEXT: v_or_b32_e32 v2, v2, v9 +; GFX8-SDAG-NEXT: v_or_b32_e32 v3, v3, v8 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_v8f16: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v8, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v9, v1, v1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v10, v2, v2 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v11, v3, v3 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v12, v4, v4 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v13, v5, v5 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v14, v6, v6 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v15, v7, v7 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v8, v8, v12 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v9, v13 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_min_f16_e32 v5, v10, v14 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v2, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_min_f16_e32 v6, v11, v15 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v3, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v8, v0 +; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v4, v1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v2, v5, v2 +; GFX8-GISEL-NEXT: v_or_b32_e32 v3, v6, v3 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_minimumnum_v8f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v4 +; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v5, v5 +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v4 +; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v6, v6 +; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX900-SDAG-NEXT: v_pk_min_f16 v2, v2, v4 +; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v7, v7 +; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX900-SDAG-NEXT: v_pk_min_f16 v3, v3, v4 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_minimumnum_v8f16: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX9-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX9-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX9-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX9-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX9-GISEL-NEXT: v_pk_min_f16 v0, v0, v4 +; GFX9-GISEL-NEXT: v_pk_min_f16 v1, v1, v5 +; GFX9-GISEL-NEXT: v_pk_min_f16 v2, v2, v6 +; GFX9-GISEL-NEXT: v_pk_min_f16 v3, v3, v7 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_v8f16: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v4 +; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v5, v5 +; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v4 +; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v6, v6 +; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX950-SDAG-NEXT: v_pk_min_f16 v2, v2, v4 +; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v7, v7 +; GFX950-SDAG-NEXT: s_nop 0 +; GFX950-SDAG-NEXT: v_pk_min_f16 v3, v3, v4 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_v8f16: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v4 +; GFX10-SDAG-NEXT: v_pk_min_f16 v1, v1, v5 +; GFX10-SDAG-NEXT: v_pk_min_f16 v2, v2, v6 +; GFX10-SDAG-NEXT: v_pk_min_f16 v3, v3, v7 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_v8f16: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX10-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX10-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v4 +; GFX10-GISEL-NEXT: v_pk_min_f16 v1, v1, v5 +; GFX10-GISEL-NEXT: v_pk_min_f16 v2, v2, v6 +; GFX10-GISEL-NEXT: v_pk_min_f16 v3, v3, v7 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_v8f16: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v4 +; GFX11-SDAG-NEXT: v_pk_min_f16 v1, v1, v5 +; GFX11-SDAG-NEXT: v_pk_min_f16 v2, v2, v6 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-SDAG-NEXT: v_pk_min_f16 v3, v3, v7 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_v8f16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX11-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX11-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v4 +; GFX11-GISEL-NEXT: v_pk_min_f16 v1, v1, v5 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-GISEL-NEXT: v_pk_min_f16 v2, v2, v6 +; GFX11-GISEL-NEXT: v_pk_min_f16 v3, v3, v7 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_v8f16: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v4 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v5 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v2, v2, v6 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v3, v3, v7 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_v8f16: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v4 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v5 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v2, v2, v6 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v3, v3, v7 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> %x, <8 x half> %y) ret <8 x half> %result } -define <16 x half> @v_minimumnum_v16f16(<16 x half> %x, <16 x half> %y) { -; GFX7-LABEL: v_minimumnum_v16f16: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v16 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v16 -; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v17 -; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v20 -; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16 -; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 -; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 -; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_min_f32_e32 v1, v1, v16 -; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v18 -; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v21 -; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17 -; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16 -; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18 -; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 -; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 -; GFX7-NEXT: v_min_f32_e32 v2, v2, v16 -; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v19 -; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v22 -; GFX7-NEXT: v_cvt_f16_f32_e32 v20, v23 -; GFX7-NEXT: v_min_f32_e32 v4, v4, v17 -; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16 -; GFX7-NEXT: v_cvt_f32_f16_e32 v19, v19 -; GFX7-NEXT: v_min_f32_e32 v5, v5, v18 -; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8 -; GFX7-NEXT: v_min_f32_e32 v3, v3, v16 -; GFX7-NEXT: buffer_load_dword v16, off, s[0:3], s32 -; GFX7-NEXT: v_min_f32_e32 v6, v6, v19 -; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v24 -; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9 -; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v25 -; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10 -; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v26 -; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 -; GFX7-NEXT: v_cvt_f32_f16_e32 v20, v20 -; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8 -; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17 -; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9 -; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18 -; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10 -; GFX7-NEXT: v_cvt_f32_f16_e32 v19, v19 -; GFX7-NEXT: v_min_f32_e32 v7, v7, v20 -; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11 -; GFX7-NEXT: v_cvt_f16_f32_e32 v20, v27 -; GFX7-NEXT: v_min_f32_e32 v8, v8, v17 -; GFX7-NEXT: v_min_f32_e32 v9, v9, v18 -; GFX7-NEXT: v_min_f32_e32 v10, v10, v19 -; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12 -; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v28 -; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13 -; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v29 -; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14 -; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v30 -; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v15 -; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11 -; GFX7-NEXT: v_cvt_f32_f16_e32 v20, v20 -; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12 -; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17 -; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v13 -; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18 -; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14 -; GFX7-NEXT: v_cvt_f32_f16_e32 v19, v19 -; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15 -; GFX7-NEXT: v_min_f32_e32 v11, v11, v20 -; GFX7-NEXT: v_min_f32_e32 v12, v12, v17 -; GFX7-NEXT: v_min_f32_e32 v13, v13, v18 -; GFX7-NEXT: v_min_f32_e32 v14, v14, v19 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v16 -; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16 -; GFX7-NEXT: v_min_f32_e32 v15, v15, v16 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_v16f16: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v16, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v17, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_sdwa v16, v17, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v17, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v18, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_sdwa v17, v18, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v18, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v19, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_sdwa v18, v19, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v19, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v20, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_sdwa v19, v20, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v20, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v21, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_sdwa v20, v21, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v21, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v22, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_sdwa v21, v22, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v22, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v23, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_sdwa v22, v23, v22 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v23, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v24, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v15, v15, v15 -; GFX8-NEXT: v_max_f16_e32 v7, v7, v7 -; GFX8-NEXT: v_max_f16_e32 v14, v14, v14 -; GFX8-NEXT: v_max_f16_e32 v6, v6, v6 -; GFX8-NEXT: v_max_f16_e32 v13, v13, v13 -; GFX8-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX8-NEXT: v_max_f16_e32 v12, v12, v12 -; GFX8-NEXT: v_max_f16_e32 v4, v4, v4 -; GFX8-NEXT: v_max_f16_e32 v11, v11, v11 -; GFX8-NEXT: v_max_f16_e32 v3, v3, v3 -; GFX8-NEXT: v_max_f16_e32 v10, v10, v10 -; GFX8-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX8-NEXT: v_max_f16_e32 v9, v9, v9 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX8-NEXT: v_max_f16_e32 v8, v8, v8 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_min_f16_sdwa v23, v24, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_min_f16_e32 v7, v7, v15 -; GFX8-NEXT: v_min_f16_e32 v6, v6, v14 -; GFX8-NEXT: v_min_f16_e32 v5, v5, v13 -; GFX8-NEXT: v_min_f16_e32 v4, v4, v12 -; GFX8-NEXT: v_min_f16_e32 v3, v3, v11 -; GFX8-NEXT: v_min_f16_e32 v2, v2, v10 -; GFX8-NEXT: v_min_f16_e32 v1, v1, v9 -; GFX8-NEXT: v_min_f16_e32 v0, v0, v8 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v23 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v22 -; GFX8-NEXT: v_or_b32_e32 v2, v2, v21 -; GFX8-NEXT: v_or_b32_e32 v3, v3, v20 -; GFX8-NEXT: v_or_b32_e32 v4, v4, v19 -; GFX8-NEXT: v_or_b32_e32 v5, v5, v18 -; GFX8-NEXT: v_or_b32_e32 v6, v6, v17 -; GFX8-NEXT: v_or_b32_e32 v7, v7, v16 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_minimumnum_v16f16: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_pk_max_f16 v8, v8, v8 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX900-NEXT: v_pk_min_f16 v0, v0, v8 -; GFX900-NEXT: v_pk_max_f16 v8, v9, v9 -; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX900-NEXT: v_pk_min_f16 v1, v1, v8 -; GFX900-NEXT: v_pk_max_f16 v8, v10, v10 -; GFX900-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX900-NEXT: v_pk_min_f16 v2, v2, v8 -; GFX900-NEXT: v_pk_max_f16 v8, v11, v11 -; GFX900-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX900-NEXT: v_pk_min_f16 v3, v3, v8 -; GFX900-NEXT: v_pk_max_f16 v8, v12, v12 -; GFX900-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX900-NEXT: v_pk_min_f16 v4, v4, v8 -; GFX900-NEXT: v_pk_max_f16 v8, v13, v13 -; GFX900-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX900-NEXT: v_pk_min_f16 v5, v5, v8 -; GFX900-NEXT: v_pk_max_f16 v8, v14, v14 -; GFX900-NEXT: v_pk_max_f16 v6, v6, v6 -; GFX900-NEXT: v_pk_min_f16 v6, v6, v8 -; GFX900-NEXT: v_pk_max_f16 v8, v15, v15 -; GFX900-NEXT: v_pk_max_f16 v7, v7, v7 -; GFX900-NEXT: v_pk_min_f16 v7, v7, v8 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_minimumnum_v16f16: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_pk_max_f16 v8, v8, v8 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX950-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX950-NEXT: v_pk_min_f16 v0, v0, v8 -; GFX950-NEXT: v_pk_max_f16 v8, v9, v9 -; GFX950-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX950-NEXT: v_pk_min_f16 v1, v1, v8 -; GFX950-NEXT: v_pk_max_f16 v8, v10, v10 -; GFX950-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX950-NEXT: v_pk_min_f16 v2, v2, v8 -; GFX950-NEXT: v_pk_max_f16 v8, v11, v11 -; GFX950-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX950-NEXT: v_pk_min_f16 v3, v3, v8 -; GFX950-NEXT: v_pk_max_f16 v8, v12, v12 -; GFX950-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX950-NEXT: v_pk_min_f16 v4, v4, v8 -; GFX950-NEXT: v_pk_max_f16 v8, v13, v13 -; GFX950-NEXT: v_pk_max_f16 v6, v6, v6 -; GFX950-NEXT: v_pk_min_f16 v5, v5, v8 -; GFX950-NEXT: v_pk_max_f16 v8, v14, v14 -; GFX950-NEXT: v_pk_max_f16 v7, v7, v7 -; GFX950-NEXT: v_pk_min_f16 v6, v6, v8 -; GFX950-NEXT: v_pk_max_f16 v8, v15, v15 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_pk_min_f16 v7, v7, v8 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_v16f16: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_pk_max_f16 v8, v8, v8 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX10-NEXT: v_pk_max_f16 v9, v9, v9 -; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX10-NEXT: v_pk_max_f16 v10, v10, v10 -; GFX10-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX10-NEXT: v_pk_min_f16 v0, v0, v8 -; GFX10-NEXT: v_pk_max_f16 v8, v11, v11 -; GFX10-NEXT: v_pk_min_f16 v1, v1, v9 -; GFX10-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX10-NEXT: v_pk_min_f16 v2, v2, v10 -; GFX10-NEXT: v_pk_max_f16 v9, v12, v12 -; GFX10-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX10-NEXT: v_pk_max_f16 v10, v13, v13 -; GFX10-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX10-NEXT: v_pk_max_f16 v11, v14, v14 -; GFX10-NEXT: v_pk_max_f16 v6, v6, v6 -; GFX10-NEXT: v_pk_max_f16 v12, v15, v15 -; GFX10-NEXT: v_pk_max_f16 v7, v7, v7 -; GFX10-NEXT: v_pk_min_f16 v3, v3, v8 -; GFX10-NEXT: v_pk_min_f16 v4, v4, v9 -; GFX10-NEXT: v_pk_min_f16 v5, v5, v10 -; GFX10-NEXT: v_pk_min_f16 v6, v6, v11 -; GFX10-NEXT: v_pk_min_f16 v7, v7, v12 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_v16f16: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_pk_max_f16 v8, v8, v8 -; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX11-NEXT: v_pk_max_f16 v9, v9, v9 -; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX11-NEXT: v_pk_max_f16 v10, v10, v10 -; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX11-NEXT: v_pk_min_f16 v0, v0, v8 -; GFX11-NEXT: v_pk_max_f16 v8, v11, v11 -; GFX11-NEXT: v_pk_min_f16 v1, v1, v9 -; GFX11-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX11-NEXT: v_pk_min_f16 v2, v2, v10 -; GFX11-NEXT: v_pk_max_f16 v9, v12, v12 -; GFX11-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX11-NEXT: v_pk_max_f16 v10, v13, v13 -; GFX11-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX11-NEXT: v_pk_max_f16 v11, v14, v14 -; GFX11-NEXT: v_pk_max_f16 v6, v6, v6 -; GFX11-NEXT: v_pk_max_f16 v12, v15, v15 -; GFX11-NEXT: v_pk_max_f16 v7, v7, v7 -; GFX11-NEXT: v_pk_min_f16 v3, v3, v8 -; GFX11-NEXT: v_pk_min_f16 v4, v4, v9 -; GFX11-NEXT: v_pk_min_f16 v5, v5, v10 -; GFX11-NEXT: v_pk_min_f16 v6, v6, v11 -; GFX11-NEXT: v_pk_min_f16 v7, v7, v12 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_v16f16: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_pk_max_num_f16 v8, v8, v8 -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 -; GFX12-NEXT: v_pk_max_num_f16 v9, v9, v9 -; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 -; GFX12-NEXT: v_pk_max_num_f16 v10, v10, v10 -; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2 -; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v8 -; GFX12-NEXT: v_pk_max_num_f16 v8, v11, v11 -; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v9 -; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3 -; GFX12-NEXT: v_pk_min_num_f16 v2, v2, v10 -; GFX12-NEXT: v_pk_max_num_f16 v9, v12, v12 -; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4 -; GFX12-NEXT: v_pk_max_num_f16 v10, v13, v13 -; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5 -; GFX12-NEXT: v_pk_max_num_f16 v11, v14, v14 -; GFX12-NEXT: v_pk_max_num_f16 v6, v6, v6 -; GFX12-NEXT: v_pk_max_num_f16 v12, v15, v15 -; GFX12-NEXT: v_pk_max_num_f16 v7, v7, v7 -; GFX12-NEXT: v_pk_min_num_f16 v3, v3, v8 -; GFX12-NEXT: v_pk_min_num_f16 v4, v4, v9 -; GFX12-NEXT: v_pk_min_num_f16 v5, v5, v10 -; GFX12-NEXT: v_pk_min_num_f16 v6, v6, v11 -; GFX12-NEXT: v_pk_min_num_f16 v7, v7, v12 -; GFX12-NEXT: s_setpc_b64 s[30:31] - %result = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> %x, <16 x half> %y) - ret <16 x half> %result -} - -define <32 x half> @v_minimumnum_v32f16(<32 x half> %x, <32 x half> %y) { -; GFX7-LABEL: v_minimumnum_v32f16: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 -; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 -; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 -; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 -; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 -; GFX7-NEXT: v_cvt_f16_f32_e32 v8, v8 -; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v9 -; GFX7-NEXT: v_cvt_f16_f32_e32 v10, v10 -; GFX7-NEXT: v_cvt_f16_f32_e32 v11, v11 -; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v8 -; GFX7-NEXT: v_cvt_f32_f16_e32 v9, v9 -; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v10 -; GFX7-NEXT: v_cvt_f32_f16_e32 v11, v11 -; GFX7-NEXT: v_cvt_f16_f32_e32 v12, v12 -; GFX7-NEXT: v_cvt_f16_f32_e32 v13, v13 -; GFX7-NEXT: v_cvt_f16_f32_e32 v14, v14 -; GFX7-NEXT: v_cvt_f16_f32_e32 v15, v15 -; GFX7-NEXT: v_cvt_f32_f16_e32 v12, v12 -; GFX7-NEXT: v_cvt_f32_f16_e32 v13, v13 -; GFX7-NEXT: v_cvt_f32_f16_e32 v14, v14 -; GFX7-NEXT: v_cvt_f32_f16_e32 v15, v15 -; GFX7-NEXT: v_cvt_f16_f32_e32 v16, v16 -; GFX7-NEXT: v_cvt_f16_f32_e32 v17, v17 -; GFX7-NEXT: v_cvt_f16_f32_e32 v18, v18 -; GFX7-NEXT: v_cvt_f16_f32_e32 v19, v19 -; GFX7-NEXT: v_cvt_f32_f16_e32 v16, v16 -; GFX7-NEXT: v_cvt_f32_f16_e32 v17, v17 -; GFX7-NEXT: v_cvt_f32_f16_e32 v18, v18 -; GFX7-NEXT: v_cvt_f32_f16_e32 v19, v19 -; GFX7-NEXT: v_cvt_f16_f32_e32 v20, v20 -; GFX7-NEXT: v_cvt_f16_f32_e32 v21, v21 -; GFX7-NEXT: v_cvt_f16_f32_e32 v22, v22 -; GFX7-NEXT: v_cvt_f16_f32_e32 v23, v23 -; GFX7-NEXT: v_cvt_f32_f16_e32 v20, v20 -; GFX7-NEXT: v_cvt_f32_f16_e32 v21, v21 -; GFX7-NEXT: v_cvt_f32_f16_e32 v22, v22 -; GFX7-NEXT: v_cvt_f32_f16_e32 v23, v23 -; GFX7-NEXT: v_cvt_f16_f32_e32 v24, v24 -; GFX7-NEXT: v_cvt_f16_f32_e32 v25, v25 -; GFX7-NEXT: v_cvt_f16_f32_e32 v26, v26 -; GFX7-NEXT: v_cvt_f16_f32_e32 v27, v27 -; GFX7-NEXT: v_cvt_f32_f16_e32 v24, v24 -; GFX7-NEXT: v_cvt_f32_f16_e32 v25, v25 -; GFX7-NEXT: v_cvt_f32_f16_e32 v26, v26 -; GFX7-NEXT: v_cvt_f32_f16_e32 v27, v27 -; GFX7-NEXT: v_cvt_f16_f32_e32 v28, v28 -; GFX7-NEXT: v_cvt_f16_f32_e32 v29, v29 -; GFX7-NEXT: v_cvt_f16_f32_e32 v30, v30 -; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:128 -; GFX7-NEXT: v_cvt_f32_f16_e32 v28, v28 -; GFX7-NEXT: v_cvt_f32_f16_e32 v29, v29 -; GFX7-NEXT: v_cvt_f32_f16_e32 v30, v30 -; GFX7-NEXT: s_waitcnt vmcnt(1) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:8 -; GFX7-NEXT: s_waitcnt vmcnt(1) -; GFX7-NEXT: v_cvt_f16_f32_e32 v32, v32 -; GFX7-NEXT: v_cvt_f32_f16_e32 v32, v32 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v1, v1, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v2, v2, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:16 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v3, v3, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v4, v4, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v5, v5, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v6, v6, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:32 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v7, v7, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v8, v8, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:40 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v9, v9, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v10, v10, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:48 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v11, v11, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v12, v12, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:56 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v13, v13, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v14, v14, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:64 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v15, v15, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v16, v16, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:72 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v17, v17, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v18, v18, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:80 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v19, v19, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v20, v20, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:88 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v21, v21, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v22, v22, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:96 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v23, v23, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v24, v24, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:104 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v25, v25, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v26, v26, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:112 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v27, v27, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v28, v28, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:120 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v29, v29, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:124 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v30, v30, v31 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v31, v31 -; GFX7-NEXT: v_cvt_f32_f16_e32 v31, v31 -; GFX7-NEXT: v_min_f32_e32 v31, v31, v32 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_v32f16: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX8-NEXT: v_max_f16_sdwa v38, v27, v27 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v39, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v48, v26, v26 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v49, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v50, v25, v25 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v51, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v40, v22, v22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v41, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v58, v17, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v59, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v17, v17, v17 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX8-NEXT: v_max_f16_sdwa v52, v24, v24 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v53, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v54, v23, v23 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v55, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v42, v21, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v43, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v44, v20, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v45, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v46, v19, v19 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v47, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v56, v18, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v57, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_sdwa v38, v39, v38 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_min_f16_sdwa v39, v49, v48 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_min_f16_sdwa v48, v51, v50 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_min_f16_sdwa v51, v41, v40 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_min_f16_sdwa v40, v59, v58 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_min_f16_e32 v1, v1, v17 -; GFX8-NEXT: v_min_f16_sdwa v49, v53, v52 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_min_f16_sdwa v50, v55, v54 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_min_f16_sdwa v52, v43, v42 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_min_f16_sdwa v53, v45, v44 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_min_f16_sdwa v54, v47, v46 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_min_f16_sdwa v55, v57, v56 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v1, v1, v40 -; GFX8-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 -; GFX8-NEXT: v_max_f16_sdwa v32, v30, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v33, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v34, v29, v29 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v35, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v36, v28, v28 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v37, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_sdwa v32, v33, v32 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v33, v16, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_sdwa v34, v35, v34 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v35, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_sdwa v36, v37, v36 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_sdwa v37, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v15, v15, v15 -; GFX8-NEXT: v_min_f16_sdwa v33, v35, v33 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_max_f16_e32 v30, v30, v30 -; GFX8-NEXT: v_max_f16_e32 v14, v14, v14 -; GFX8-NEXT: v_max_f16_e32 v29, v29, v29 -; GFX8-NEXT: v_max_f16_e32 v13, v13, v13 -; GFX8-NEXT: v_max_f16_e32 v28, v28, v28 -; GFX8-NEXT: v_max_f16_e32 v12, v12, v12 -; GFX8-NEXT: v_max_f16_e32 v27, v27, v27 -; GFX8-NEXT: v_max_f16_e32 v11, v11, v11 -; GFX8-NEXT: v_max_f16_e32 v26, v26, v26 -; GFX8-NEXT: v_max_f16_e32 v10, v10, v10 -; GFX8-NEXT: v_max_f16_e32 v25, v25, v25 -; GFX8-NEXT: v_max_f16_e32 v9, v9, v9 -; GFX8-NEXT: v_max_f16_e32 v24, v24, v24 -; GFX8-NEXT: v_max_f16_e32 v8, v8, v8 -; GFX8-NEXT: v_max_f16_e32 v23, v23, v23 -; GFX8-NEXT: v_max_f16_e32 v7, v7, v7 -; GFX8-NEXT: v_max_f16_e32 v22, v22, v22 -; GFX8-NEXT: v_max_f16_e32 v6, v6, v6 -; GFX8-NEXT: v_max_f16_e32 v21, v21, v21 -; GFX8-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX8-NEXT: v_max_f16_e32 v20, v20, v20 -; GFX8-NEXT: v_max_f16_e32 v4, v4, v4 -; GFX8-NEXT: v_max_f16_e32 v19, v19, v19 -; GFX8-NEXT: v_max_f16_e32 v3, v3, v3 -; GFX8-NEXT: v_max_f16_e32 v18, v18, v18 -; GFX8-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX8-NEXT: v_max_f16_e32 v16, v16, v16 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_min_f16_e32 v14, v14, v30 -; GFX8-NEXT: v_min_f16_e32 v13, v13, v29 -; GFX8-NEXT: v_min_f16_e32 v12, v12, v28 -; GFX8-NEXT: v_min_f16_e32 v11, v11, v27 -; GFX8-NEXT: v_min_f16_e32 v10, v10, v26 -; GFX8-NEXT: v_min_f16_e32 v9, v9, v25 -; GFX8-NEXT: v_min_f16_e32 v8, v8, v24 -; GFX8-NEXT: v_min_f16_e32 v7, v7, v23 -; GFX8-NEXT: v_min_f16_e32 v6, v6, v22 -; GFX8-NEXT: v_min_f16_e32 v5, v5, v21 -; GFX8-NEXT: v_min_f16_e32 v4, v4, v20 -; GFX8-NEXT: v_min_f16_e32 v3, v3, v19 -; GFX8-NEXT: v_min_f16_e32 v2, v2, v18 -; GFX8-NEXT: v_min_f16_e32 v0, v0, v16 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v33 -; GFX8-NEXT: v_or_b32_e32 v2, v2, v55 -; GFX8-NEXT: v_or_b32_e32 v3, v3, v54 -; GFX8-NEXT: v_or_b32_e32 v4, v4, v53 -; GFX8-NEXT: v_or_b32_e32 v5, v5, v52 -; GFX8-NEXT: v_or_b32_e32 v6, v6, v51 -; GFX8-NEXT: v_or_b32_e32 v7, v7, v50 -; GFX8-NEXT: v_or_b32_e32 v8, v8, v49 -; GFX8-NEXT: v_or_b32_e32 v9, v9, v48 -; GFX8-NEXT: v_or_b32_e32 v10, v10, v39 -; GFX8-NEXT: v_or_b32_e32 v11, v11, v38 -; GFX8-NEXT: v_or_b32_e32 v12, v12, v36 -; GFX8-NEXT: v_or_b32_e32 v13, v13, v34 -; GFX8-NEXT: v_or_b32_e32 v14, v14, v32 -; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v35, v31, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v31, v31, v31 -; GFX8-NEXT: v_min_f16_sdwa v35, v37, v35 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_min_f16_e32 v15, v15, v31 -; GFX8-NEXT: v_or_b32_e32 v15, v15, v35 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_minimumnum_v32f16: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_pk_max_f16 v16, v16, v16 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX900-NEXT: v_pk_min_f16 v0, v0, v16 -; GFX900-NEXT: v_pk_max_f16 v16, v17, v17 -; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX900-NEXT: v_pk_min_f16 v1, v1, v16 -; GFX900-NEXT: v_pk_max_f16 v16, v18, v18 -; GFX900-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX900-NEXT: v_pk_min_f16 v2, v2, v16 -; GFX900-NEXT: v_pk_max_f16 v16, v19, v19 -; GFX900-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX900-NEXT: v_pk_min_f16 v3, v3, v16 -; GFX900-NEXT: buffer_load_dword v16, off, s[0:3], s32 -; GFX900-NEXT: v_pk_max_f16 v17, v20, v20 -; GFX900-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX900-NEXT: v_pk_max_f16 v18, v21, v21 -; GFX900-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX900-NEXT: v_pk_max_f16 v19, v22, v22 -; GFX900-NEXT: v_pk_max_f16 v6, v6, v6 -; GFX900-NEXT: v_pk_max_f16 v20, v23, v23 -; GFX900-NEXT: v_pk_max_f16 v7, v7, v7 -; GFX900-NEXT: v_pk_max_f16 v21, v24, v24 -; GFX900-NEXT: v_pk_max_f16 v8, v8, v8 -; GFX900-NEXT: v_pk_max_f16 v22, v25, v25 -; GFX900-NEXT: v_pk_max_f16 v9, v9, v9 -; GFX900-NEXT: v_pk_max_f16 v23, v26, v26 -; GFX900-NEXT: v_pk_max_f16 v10, v10, v10 -; GFX900-NEXT: v_pk_max_f16 v24, v27, v27 -; GFX900-NEXT: v_pk_max_f16 v11, v11, v11 -; GFX900-NEXT: v_pk_max_f16 v25, v28, v28 -; GFX900-NEXT: v_pk_max_f16 v12, v12, v12 -; GFX900-NEXT: v_pk_max_f16 v26, v29, v29 -; GFX900-NEXT: v_pk_max_f16 v13, v13, v13 -; GFX900-NEXT: v_pk_max_f16 v27, v30, v30 -; GFX900-NEXT: v_pk_max_f16 v14, v14, v14 -; GFX900-NEXT: v_pk_max_f16 v15, v15, v15 -; GFX900-NEXT: v_pk_min_f16 v4, v4, v17 -; GFX900-NEXT: v_pk_min_f16 v5, v5, v18 -; GFX900-NEXT: v_pk_min_f16 v6, v6, v19 -; GFX900-NEXT: v_pk_min_f16 v7, v7, v20 -; GFX900-NEXT: v_pk_min_f16 v8, v8, v21 -; GFX900-NEXT: v_pk_min_f16 v9, v9, v22 -; GFX900-NEXT: v_pk_min_f16 v10, v10, v23 -; GFX900-NEXT: v_pk_min_f16 v11, v11, v24 -; GFX900-NEXT: v_pk_min_f16 v12, v12, v25 -; GFX900-NEXT: v_pk_min_f16 v13, v13, v26 -; GFX900-NEXT: v_pk_min_f16 v14, v14, v27 -; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_pk_max_f16 v16, v16, v16 -; GFX900-NEXT: v_pk_min_f16 v15, v15, v16 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_minimumnum_v32f16: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: scratch_load_dword v31, off, s32 -; GFX950-NEXT: v_pk_max_f16 v16, v16, v16 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX950-NEXT: v_pk_max_f16 v17, v17, v17 -; GFX950-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX950-NEXT: v_pk_max_f16 v18, v18, v18 -; GFX950-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX950-NEXT: v_pk_max_f16 v19, v19, v19 -; GFX950-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX950-NEXT: v_pk_max_f16 v20, v20, v20 -; GFX950-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX950-NEXT: v_pk_max_f16 v21, v21, v21 -; GFX950-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX950-NEXT: v_pk_max_f16 v22, v22, v22 -; GFX950-NEXT: v_pk_max_f16 v6, v6, v6 -; GFX950-NEXT: v_pk_max_f16 v23, v23, v23 -; GFX950-NEXT: v_pk_max_f16 v7, v7, v7 -; GFX950-NEXT: v_pk_max_f16 v24, v24, v24 -; GFX950-NEXT: v_pk_max_f16 v8, v8, v8 -; GFX950-NEXT: v_pk_max_f16 v25, v25, v25 -; GFX950-NEXT: v_pk_max_f16 v9, v9, v9 -; GFX950-NEXT: v_pk_max_f16 v26, v26, v26 -; GFX950-NEXT: v_pk_max_f16 v10, v10, v10 -; GFX950-NEXT: v_pk_max_f16 v27, v27, v27 -; GFX950-NEXT: v_pk_max_f16 v11, v11, v11 -; GFX950-NEXT: v_pk_max_f16 v28, v28, v28 -; GFX950-NEXT: v_pk_max_f16 v12, v12, v12 -; GFX950-NEXT: v_pk_max_f16 v29, v29, v29 -; GFX950-NEXT: v_pk_max_f16 v13, v13, v13 -; GFX950-NEXT: v_pk_max_f16 v30, v30, v30 -; GFX950-NEXT: v_pk_max_f16 v14, v14, v14 -; GFX950-NEXT: v_pk_max_f16 v15, v15, v15 -; GFX950-NEXT: v_pk_min_f16 v0, v0, v16 -; GFX950-NEXT: v_pk_min_f16 v1, v1, v17 -; GFX950-NEXT: v_pk_min_f16 v2, v2, v18 -; GFX950-NEXT: v_pk_min_f16 v3, v3, v19 -; GFX950-NEXT: v_pk_min_f16 v4, v4, v20 -; GFX950-NEXT: v_pk_min_f16 v5, v5, v21 -; GFX950-NEXT: v_pk_min_f16 v6, v6, v22 -; GFX950-NEXT: v_pk_min_f16 v7, v7, v23 -; GFX950-NEXT: v_pk_min_f16 v8, v8, v24 -; GFX950-NEXT: v_pk_min_f16 v9, v9, v25 -; GFX950-NEXT: v_pk_min_f16 v10, v10, v26 -; GFX950-NEXT: v_pk_min_f16 v11, v11, v27 -; GFX950-NEXT: v_pk_min_f16 v12, v12, v28 -; GFX950-NEXT: v_pk_min_f16 v13, v13, v29 -; GFX950-NEXT: v_pk_min_f16 v14, v14, v30 -; GFX950-NEXT: s_waitcnt vmcnt(0) -; GFX950-NEXT: v_pk_max_f16 v16, v31, v31 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_pk_min_f16 v15, v15, v16 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_v32f16: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 -; GFX10-NEXT: v_pk_max_f16 v16, v16, v16 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX10-NEXT: v_pk_max_f16 v17, v17, v17 -; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX10-NEXT: v_pk_max_f16 v18, v18, v18 -; GFX10-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX10-NEXT: v_pk_max_f16 v19, v19, v19 -; GFX10-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX10-NEXT: v_pk_max_f16 v20, v20, v20 -; GFX10-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX10-NEXT: v_pk_max_f16 v21, v21, v21 -; GFX10-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX10-NEXT: v_pk_max_f16 v22, v22, v22 -; GFX10-NEXT: v_pk_max_f16 v6, v6, v6 -; GFX10-NEXT: v_pk_max_f16 v23, v23, v23 -; GFX10-NEXT: v_pk_max_f16 v7, v7, v7 -; GFX10-NEXT: v_pk_max_f16 v24, v24, v24 -; GFX10-NEXT: v_pk_max_f16 v8, v8, v8 -; GFX10-NEXT: v_pk_max_f16 v25, v25, v25 -; GFX10-NEXT: v_pk_max_f16 v9, v9, v9 -; GFX10-NEXT: v_pk_max_f16 v26, v26, v26 -; GFX10-NEXT: v_pk_max_f16 v10, v10, v10 -; GFX10-NEXT: v_pk_max_f16 v27, v27, v27 -; GFX10-NEXT: v_pk_max_f16 v11, v11, v11 -; GFX10-NEXT: v_pk_max_f16 v28, v28, v28 -; GFX10-NEXT: v_pk_max_f16 v12, v12, v12 -; GFX10-NEXT: v_pk_max_f16 v29, v29, v29 -; GFX10-NEXT: v_pk_max_f16 v13, v13, v13 -; GFX10-NEXT: v_pk_max_f16 v30, v30, v30 -; GFX10-NEXT: v_pk_max_f16 v14, v14, v14 -; GFX10-NEXT: v_pk_max_f16 v15, v15, v15 -; GFX10-NEXT: v_pk_min_f16 v0, v0, v16 -; GFX10-NEXT: v_pk_min_f16 v1, v1, v17 -; GFX10-NEXT: v_pk_min_f16 v2, v2, v18 -; GFX10-NEXT: v_pk_min_f16 v3, v3, v19 -; GFX10-NEXT: v_pk_min_f16 v4, v4, v20 -; GFX10-NEXT: v_pk_min_f16 v5, v5, v21 -; GFX10-NEXT: v_pk_min_f16 v6, v6, v22 -; GFX10-NEXT: v_pk_min_f16 v7, v7, v23 -; GFX10-NEXT: v_pk_min_f16 v8, v8, v24 -; GFX10-NEXT: v_pk_min_f16 v9, v9, v25 -; GFX10-NEXT: v_pk_min_f16 v10, v10, v26 -; GFX10-NEXT: v_pk_min_f16 v11, v11, v27 -; GFX10-NEXT: v_pk_min_f16 v12, v12, v28 -; GFX10-NEXT: v_pk_min_f16 v13, v13, v29 -; GFX10-NEXT: v_pk_min_f16 v14, v14, v30 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_pk_max_f16 v16, v31, v31 -; GFX10-NEXT: v_pk_min_f16 v15, v15, v16 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_v32f16: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: scratch_load_b32 v31, off, s32 -; GFX11-NEXT: v_pk_max_f16 v16, v16, v16 -; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX11-NEXT: v_pk_max_f16 v17, v17, v17 -; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX11-NEXT: v_pk_max_f16 v18, v18, v18 -; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX11-NEXT: v_pk_max_f16 v19, v19, v19 -; GFX11-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX11-NEXT: v_pk_max_f16 v20, v20, v20 -; GFX11-NEXT: v_pk_max_f16 v4, v4, v4 -; GFX11-NEXT: v_pk_max_f16 v21, v21, v21 -; GFX11-NEXT: v_pk_max_f16 v5, v5, v5 -; GFX11-NEXT: v_pk_max_f16 v22, v22, v22 -; GFX11-NEXT: v_pk_max_f16 v6, v6, v6 -; GFX11-NEXT: v_pk_max_f16 v23, v23, v23 -; GFX11-NEXT: v_pk_max_f16 v7, v7, v7 -; GFX11-NEXT: v_pk_max_f16 v24, v24, v24 -; GFX11-NEXT: v_pk_max_f16 v8, v8, v8 -; GFX11-NEXT: v_pk_max_f16 v25, v25, v25 -; GFX11-NEXT: v_pk_max_f16 v9, v9, v9 -; GFX11-NEXT: v_pk_max_f16 v26, v26, v26 -; GFX11-NEXT: v_pk_max_f16 v10, v10, v10 -; GFX11-NEXT: v_pk_max_f16 v27, v27, v27 -; GFX11-NEXT: v_pk_max_f16 v11, v11, v11 -; GFX11-NEXT: v_pk_max_f16 v28, v28, v28 -; GFX11-NEXT: v_pk_max_f16 v12, v12, v12 -; GFX11-NEXT: v_pk_max_f16 v29, v29, v29 -; GFX11-NEXT: v_pk_max_f16 v13, v13, v13 -; GFX11-NEXT: v_pk_max_f16 v30, v30, v30 -; GFX11-NEXT: v_pk_max_f16 v14, v14, v14 -; GFX11-NEXT: v_pk_max_f16 v15, v15, v15 -; GFX11-NEXT: v_pk_min_f16 v0, v0, v16 -; GFX11-NEXT: v_pk_min_f16 v1, v1, v17 -; GFX11-NEXT: v_pk_min_f16 v2, v2, v18 -; GFX11-NEXT: v_pk_min_f16 v3, v3, v19 -; GFX11-NEXT: v_pk_min_f16 v4, v4, v20 -; GFX11-NEXT: v_pk_min_f16 v5, v5, v21 -; GFX11-NEXT: v_pk_min_f16 v6, v6, v22 -; GFX11-NEXT: v_pk_min_f16 v7, v7, v23 -; GFX11-NEXT: v_pk_min_f16 v8, v8, v24 -; GFX11-NEXT: v_pk_min_f16 v9, v9, v25 -; GFX11-NEXT: v_pk_min_f16 v10, v10, v26 -; GFX11-NEXT: v_pk_min_f16 v11, v11, v27 -; GFX11-NEXT: v_pk_min_f16 v12, v12, v28 -; GFX11-NEXT: v_pk_min_f16 v13, v13, v29 -; GFX11-NEXT: v_pk_min_f16 v14, v14, v30 -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_pk_max_f16 v16, v31, v31 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_pk_min_f16 v15, v15, v16 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_v32f16: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: scratch_load_b32 v31, off, s32 -; GFX12-NEXT: v_pk_max_num_f16 v16, v16, v16 -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 -; GFX12-NEXT: v_pk_max_num_f16 v17, v17, v17 -; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 -; GFX12-NEXT: v_pk_max_num_f16 v18, v18, v18 -; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2 -; GFX12-NEXT: v_pk_max_num_f16 v19, v19, v19 -; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3 -; GFX12-NEXT: v_pk_max_num_f16 v20, v20, v20 -; GFX12-NEXT: v_pk_max_num_f16 v4, v4, v4 -; GFX12-NEXT: v_pk_max_num_f16 v21, v21, v21 -; GFX12-NEXT: v_pk_max_num_f16 v5, v5, v5 -; GFX12-NEXT: v_pk_max_num_f16 v22, v22, v22 -; GFX12-NEXT: v_pk_max_num_f16 v6, v6, v6 -; GFX12-NEXT: v_pk_max_num_f16 v23, v23, v23 -; GFX12-NEXT: v_pk_max_num_f16 v7, v7, v7 -; GFX12-NEXT: v_pk_max_num_f16 v24, v24, v24 -; GFX12-NEXT: v_pk_max_num_f16 v8, v8, v8 -; GFX12-NEXT: v_pk_max_num_f16 v25, v25, v25 -; GFX12-NEXT: v_pk_max_num_f16 v9, v9, v9 -; GFX12-NEXT: v_pk_max_num_f16 v26, v26, v26 -; GFX12-NEXT: v_pk_max_num_f16 v10, v10, v10 -; GFX12-NEXT: v_pk_max_num_f16 v27, v27, v27 -; GFX12-NEXT: v_pk_max_num_f16 v11, v11, v11 -; GFX12-NEXT: v_pk_max_num_f16 v28, v28, v28 -; GFX12-NEXT: v_pk_max_num_f16 v12, v12, v12 -; GFX12-NEXT: v_pk_max_num_f16 v29, v29, v29 -; GFX12-NEXT: v_pk_max_num_f16 v13, v13, v13 -; GFX12-NEXT: v_pk_max_num_f16 v30, v30, v30 -; GFX12-NEXT: v_pk_max_num_f16 v14, v14, v14 -; GFX12-NEXT: v_pk_max_num_f16 v15, v15, v15 -; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v16 -; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v17 -; GFX12-NEXT: v_pk_min_num_f16 v2, v2, v18 -; GFX12-NEXT: v_pk_min_num_f16 v3, v3, v19 -; GFX12-NEXT: v_pk_min_num_f16 v4, v4, v20 -; GFX12-NEXT: v_pk_min_num_f16 v5, v5, v21 -; GFX12-NEXT: v_pk_min_num_f16 v6, v6, v22 -; GFX12-NEXT: v_pk_min_num_f16 v7, v7, v23 -; GFX12-NEXT: v_pk_min_num_f16 v8, v8, v24 -; GFX12-NEXT: v_pk_min_num_f16 v9, v9, v25 -; GFX12-NEXT: v_pk_min_num_f16 v10, v10, v26 -; GFX12-NEXT: v_pk_min_num_f16 v11, v11, v27 -; GFX12-NEXT: v_pk_min_num_f16 v12, v12, v28 -; GFX12-NEXT: v_pk_min_num_f16 v13, v13, v29 -; GFX12-NEXT: v_pk_min_num_f16 v14, v14, v30 -; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_pk_max_num_f16 v16, v31, v31 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_pk_min_num_f16 v15, v15, v16 -; GFX12-NEXT: s_setpc_b64 s[30:31] - %result = call <32 x half> @llvm.minimumnum.v32f16(<32 x half> %x, <32 x half> %y) - ret <32 x half> %result -} - -define <2 x float> @v_minimumnum_v2f32(<2 x float> %x, <2 x float> %y) { -; GFX7-LABEL: v_minimumnum_v2f32: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2 -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v2 -; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v3 -; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 -; GFX7-NEXT: v_min_f32_e32 v1, v1, v2 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_v2f32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2 -; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX8-NEXT: v_min_f32_e32 v0, v0, v2 -; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v3 -; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 -; GFX8-NEXT: v_min_f32_e32 v1, v1, v2 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_minimumnum_v2f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-NEXT: v_min_f32_e32 v0, v0, v2 -; GFX9-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-NEXT: v_min_f32_e32 v1, v1, v2 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_v2f32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX10-NEXT: v_max_f32_e32 v3, v3, v3 -; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX10-NEXT: v_min_f32_e32 v0, v0, v2 -; GFX10-NEXT: v_min_f32_e32 v1, v1, v3 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_v2f32: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 -; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_v2f32: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 -; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3 -; GFX12-NEXT: s_setpc_b64 s[30:31] +define <16 x half> @v_minimumnum_v16f16(<16 x half> %x, <16 x half> %y) { +; GFX7-SDAG-LABEL: v_minimumnum_v16f16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v16 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v16 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v17 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v17, v20 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v16 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v18 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v18, v21 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v17, v17 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v18, v18 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v16 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v19 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v19, v22 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v20, v23 +; GFX7-SDAG-NEXT: v_min_f32_e32 v4, v4, v17 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v19, v19 +; GFX7-SDAG-NEXT: v_min_f32_e32 v5, v5, v18 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v8, v8 +; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v16 +; GFX7-SDAG-NEXT: buffer_load_dword v16, off, s[0:3], s32 +; GFX7-SDAG-NEXT: v_min_f32_e32 v6, v6, v19 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v17, v24 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v9, v9 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v18, v25 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v10, v10 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v19, v26 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v20, v20 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v17, v17 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v9, v9 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v18, v18 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v10, v10 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v19, v19 +; GFX7-SDAG-NEXT: v_min_f32_e32 v7, v7, v20 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v11, v11 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v20, v27 +; GFX7-SDAG-NEXT: v_min_f32_e32 v8, v8, v17 +; GFX7-SDAG-NEXT: v_min_f32_e32 v9, v9, v18 +; GFX7-SDAG-NEXT: v_min_f32_e32 v10, v10, v19 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v12, v12 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v17, v28 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v13, v13 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v18, v29 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v14, v14 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v19, v30 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v15, v15 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v11, v11 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v20, v20 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v12, v12 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v17, v17 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v13, v13 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v18, v18 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v14, v14 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v19, v19 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v15, v15 +; GFX7-SDAG-NEXT: v_min_f32_e32 v11, v11, v20 +; GFX7-SDAG-NEXT: v_min_f32_e32 v12, v12, v17 +; GFX7-SDAG-NEXT: v_min_f32_e32 v13, v13, v18 +; GFX7-SDAG-NEXT: v_min_f32_e32 v14, v14, v19 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v16 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16 +; GFX7-SDAG-NEXT: v_min_f32_e32 v15, v15, v16 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_v16f16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v16 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v16 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v17 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v17, v20 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v16 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v18 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v18, v21 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-GISEL-NEXT: v_min_f32_e32 v4, v4, v17 +; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v16 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v19 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v19, v22 +; GFX7-GISEL-NEXT: v_min_f32_e32 v5, v5, v18 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v16 +; GFX7-GISEL-NEXT: buffer_load_dword v16, off, s[0:3], s32 +; GFX7-GISEL-NEXT: v_min_f32_e32 v6, v6, v19 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v17, v23 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v18, v24 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v9 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v19, v25 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v10, v10 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v20, v26 +; GFX7-GISEL-NEXT: v_min_f32_e32 v7, v7, v17 +; GFX7-GISEL-NEXT: v_min_f32_e32 v8, v8, v18 +; GFX7-GISEL-NEXT: v_min_f32_e32 v9, v9, v19 +; GFX7-GISEL-NEXT: v_min_f32_e32 v10, v10, v20 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v11, v11 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v17, v27 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v12, v12 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v18, v28 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v13, v13 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v19, v29 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v14, v14 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v20, v30 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v15, v15 +; GFX7-GISEL-NEXT: v_min_f32_e32 v11, v11, v17 +; GFX7-GISEL-NEXT: v_min_f32_e32 v12, v12, v18 +; GFX7-GISEL-NEXT: v_min_f32_e32 v13, v13, v19 +; GFX7-GISEL-NEXT: v_min_f32_e32 v14, v14, v20 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v8, v8 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v9, v9 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v10, v10 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v11, v11 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v12, v12 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v13, v13 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v14, v14 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v16 +; GFX7-GISEL-NEXT: v_min_f32_e32 v15, v15, v16 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v15, v15 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_v16f16: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_sdwa v16, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v17, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v16, v17, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v17, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v18, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v17, v18, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v18, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v19, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v18, v19, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v19, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v20, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v19, v20, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v20, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v21, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v20, v21, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v21, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v22, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v21, v22, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v22, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v23, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v22, v23, v22 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v23, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v24, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v15, v15, v15 +; GFX8-SDAG-NEXT: v_max_f16_e32 v7, v7, v7 +; GFX8-SDAG-NEXT: v_max_f16_e32 v14, v14, v14 +; GFX8-SDAG-NEXT: v_max_f16_e32 v6, v6, v6 +; GFX8-SDAG-NEXT: v_max_f16_e32 v13, v13, v13 +; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX8-SDAG-NEXT: v_max_f16_e32 v12, v12, v12 +; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX8-SDAG-NEXT: v_max_f16_e32 v11, v11, v11 +; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3 +; GFX8-SDAG-NEXT: v_max_f16_e32 v10, v10, v10 +; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX8-SDAG-NEXT: v_max_f16_e32 v9, v9, v9 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v8, v8, v8 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v23, v24, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_min_f16_e32 v7, v7, v15 +; GFX8-SDAG-NEXT: v_min_f16_e32 v6, v6, v14 +; GFX8-SDAG-NEXT: v_min_f16_e32 v5, v5, v13 +; GFX8-SDAG-NEXT: v_min_f16_e32 v4, v4, v12 +; GFX8-SDAG-NEXT: v_min_f16_e32 v3, v3, v11 +; GFX8-SDAG-NEXT: v_min_f16_e32 v2, v2, v10 +; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v9 +; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v8 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v23 +; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v22 +; GFX8-SDAG-NEXT: v_or_b32_e32 v2, v2, v21 +; GFX8-SDAG-NEXT: v_or_b32_e32 v3, v3, v20 +; GFX8-SDAG-NEXT: v_or_b32_e32 v4, v4, v19 +; GFX8-SDAG-NEXT: v_or_b32_e32 v5, v5, v18 +; GFX8-SDAG-NEXT: v_or_b32_e32 v6, v6, v17 +; GFX8-SDAG-NEXT: v_or_b32_e32 v7, v7, v16 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_v16f16: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v16, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v19, v8, v8 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v8, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v1, v1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v8, v9, v9 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v9, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v18, v2, v2 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v1, v1, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v9, v10, v10 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v10, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v16, v16, v19 +; GFX8-GISEL-NEXT: v_max_f16_e32 v19, v3, v3 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v2, v2, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v10, v11, v11 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v11, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v8, v17, v8 +; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v4, v4 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v3, v3, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v11, v12, v12 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v12, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v9, v18, v9 +; GFX8-GISEL-NEXT: v_max_f16_e32 v18, v5, v5 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v4, v4, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v12, v13, v13 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v13, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v10, v19, v10 +; GFX8-GISEL-NEXT: v_max_f16_e32 v19, v6, v6 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v11, v17, v11 +; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v7, v7 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v12, v18, v12 +; GFX8-GISEL-NEXT: v_max_f16_e32 v18, v14, v14 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v14, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v5, v5, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v13, v15, v15 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v15, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v18, v19, v18 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v6, v6, v14 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_min_f16_e32 v13, v17, v13 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v7, v7, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v16, v0 +; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v8, v1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v2, v9, v2 +; GFX8-GISEL-NEXT: v_or_b32_e32 v3, v10, v3 +; GFX8-GISEL-NEXT: v_or_b32_e32 v4, v11, v4 +; GFX8-GISEL-NEXT: v_or_b32_e32 v5, v12, v5 +; GFX8-GISEL-NEXT: v_or_b32_e32 v6, v18, v6 +; GFX8-GISEL-NEXT: v_or_b32_e32 v7, v13, v7 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_minimumnum_v16f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v8 +; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v9, v9 +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v8 +; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v10, v10 +; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX900-SDAG-NEXT: v_pk_min_f16 v2, v2, v8 +; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v11, v11 +; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX900-SDAG-NEXT: v_pk_min_f16 v3, v3, v8 +; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v12, v12 +; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX900-SDAG-NEXT: v_pk_min_f16 v4, v4, v8 +; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v13, v13 +; GFX900-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX900-SDAG-NEXT: v_pk_min_f16 v5, v5, v8 +; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v14, v14 +; GFX900-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX900-SDAG-NEXT: v_pk_min_f16 v6, v6, v8 +; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v15, v15 +; GFX900-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX900-SDAG-NEXT: v_pk_min_f16 v7, v7, v8 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_minimumnum_v16f16: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX9-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX9-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX9-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX9-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX9-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX9-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX9-GISEL-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX9-GISEL-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX9-GISEL-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX9-GISEL-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX9-GISEL-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX9-GISEL-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX9-GISEL-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX9-GISEL-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX9-GISEL-NEXT: v_pk_min_f16 v0, v0, v8 +; GFX9-GISEL-NEXT: v_pk_min_f16 v1, v1, v9 +; GFX9-GISEL-NEXT: v_pk_min_f16 v2, v2, v10 +; GFX9-GISEL-NEXT: v_pk_min_f16 v3, v3, v11 +; GFX9-GISEL-NEXT: v_pk_min_f16 v4, v4, v12 +; GFX9-GISEL-NEXT: v_pk_min_f16 v5, v5, v13 +; GFX9-GISEL-NEXT: v_pk_min_f16 v6, v6, v14 +; GFX9-GISEL-NEXT: v_pk_min_f16 v7, v7, v15 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_v16f16: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v8 +; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v9, v9 +; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v8 +; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v10, v10 +; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX950-SDAG-NEXT: v_pk_min_f16 v2, v2, v8 +; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v11, v11 +; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX950-SDAG-NEXT: v_pk_min_f16 v3, v3, v8 +; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v12, v12 +; GFX950-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX950-SDAG-NEXT: v_pk_min_f16 v4, v4, v8 +; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v13, v13 +; GFX950-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX950-SDAG-NEXT: v_pk_min_f16 v5, v5, v8 +; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v14, v14 +; GFX950-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX950-SDAG-NEXT: v_pk_min_f16 v6, v6, v8 +; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v15, v15 +; GFX950-SDAG-NEXT: s_nop 0 +; GFX950-SDAG-NEXT: v_pk_min_f16 v7, v7, v8 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_v16f16: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-SDAG-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-SDAG-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v8 +; GFX10-SDAG-NEXT: v_pk_max_f16 v8, v11, v11 +; GFX10-SDAG-NEXT: v_pk_min_f16 v1, v1, v9 +; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-SDAG-NEXT: v_pk_min_f16 v2, v2, v10 +; GFX10-SDAG-NEXT: v_pk_max_f16 v9, v12, v12 +; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX10-SDAG-NEXT: v_pk_max_f16 v10, v13, v13 +; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX10-SDAG-NEXT: v_pk_max_f16 v11, v14, v14 +; GFX10-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX10-SDAG-NEXT: v_pk_max_f16 v12, v15, v15 +; GFX10-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX10-SDAG-NEXT: v_pk_min_f16 v3, v3, v8 +; GFX10-SDAG-NEXT: v_pk_min_f16 v4, v4, v9 +; GFX10-SDAG-NEXT: v_pk_min_f16 v5, v5, v10 +; GFX10-SDAG-NEXT: v_pk_min_f16 v6, v6, v11 +; GFX10-SDAG-NEXT: v_pk_min_f16 v7, v7, v12 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_v16f16: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX10-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX10-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX10-GISEL-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX10-GISEL-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX10-GISEL-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX10-GISEL-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX10-GISEL-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX10-GISEL-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX10-GISEL-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX10-GISEL-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v8 +; GFX10-GISEL-NEXT: v_pk_min_f16 v1, v1, v9 +; GFX10-GISEL-NEXT: v_pk_min_f16 v2, v2, v10 +; GFX10-GISEL-NEXT: v_pk_min_f16 v3, v3, v11 +; GFX10-GISEL-NEXT: v_pk_min_f16 v4, v4, v12 +; GFX10-GISEL-NEXT: v_pk_min_f16 v5, v5, v13 +; GFX10-GISEL-NEXT: v_pk_min_f16 v6, v6, v14 +; GFX10-GISEL-NEXT: v_pk_min_f16 v7, v7, v15 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_v16f16: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-SDAG-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-SDAG-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v8 +; GFX11-SDAG-NEXT: v_pk_max_f16 v8, v11, v11 +; GFX11-SDAG-NEXT: v_pk_min_f16 v1, v1, v9 +; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-SDAG-NEXT: v_pk_min_f16 v2, v2, v10 +; GFX11-SDAG-NEXT: v_pk_max_f16 v9, v12, v12 +; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX11-SDAG-NEXT: v_pk_max_f16 v10, v13, v13 +; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX11-SDAG-NEXT: v_pk_max_f16 v11, v14, v14 +; GFX11-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX11-SDAG-NEXT: v_pk_max_f16 v12, v15, v15 +; GFX11-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX11-SDAG-NEXT: v_pk_min_f16 v3, v3, v8 +; GFX11-SDAG-NEXT: v_pk_min_f16 v4, v4, v9 +; GFX11-SDAG-NEXT: v_pk_min_f16 v5, v5, v10 +; GFX11-SDAG-NEXT: v_pk_min_f16 v6, v6, v11 +; GFX11-SDAG-NEXT: v_pk_min_f16 v7, v7, v12 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_v16f16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX11-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX11-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX11-GISEL-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX11-GISEL-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX11-GISEL-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX11-GISEL-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX11-GISEL-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX11-GISEL-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX11-GISEL-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX11-GISEL-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v8 +; GFX11-GISEL-NEXT: v_pk_min_f16 v1, v1, v9 +; GFX11-GISEL-NEXT: v_pk_min_f16 v2, v2, v10 +; GFX11-GISEL-NEXT: v_pk_min_f16 v3, v3, v11 +; GFX11-GISEL-NEXT: v_pk_min_f16 v4, v4, v12 +; GFX11-GISEL-NEXT: v_pk_min_f16 v5, v5, v13 +; GFX11-GISEL-NEXT: v_pk_min_f16 v6, v6, v14 +; GFX11-GISEL-NEXT: v_pk_min_f16 v7, v7, v15 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_v16f16: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v8, v8, v8 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v9, v9, v9 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v10, v10, v10 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v8 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v8, v11, v11 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v9 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v2, v2, v10 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v9, v12, v12 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v10, v13, v13 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v11, v14, v14 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v12, v15, v15 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v3, v3, v8 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v4, v4, v9 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v5, v5, v10 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v6, v6, v11 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v7, v7, v12 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_v16f16: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v8, v8, v8 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v9, v9, v9 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v10, v10, v10 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v11, v11, v11 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v12, v12, v12 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v13, v13, v13 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v14, v14, v14 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v15, v15, v15 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v8 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v9 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v2, v2, v10 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v3, v3, v11 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v4, v4, v12 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v5, v5, v13 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v6, v6, v14 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v7, v7, v15 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> %x, <16 x half> %y) + ret <16 x half> %result +} + +define <32 x half> @v_minimumnum_v32f16(<32 x half> %x, <32 x half> %y) { +; GFX7-SDAG-LABEL: v_minimumnum_v32f16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v8, v8 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v9, v9 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v10, v10 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v11, v11 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v9, v9 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v10, v10 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v11, v11 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v12, v12 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v13, v13 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v14, v14 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v15, v15 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v12, v12 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v13, v13 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v14, v14 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v15, v15 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v16 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v17, v17 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v18, v18 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v19, v19 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v17, v17 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v18, v18 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v19, v19 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v20, v20 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v21, v21 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v22, v22 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v23, v23 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v20, v20 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v21, v21 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v22, v22 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v23, v23 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v24, v24 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v25, v25 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v26, v26 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v27, v27 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v24, v24 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v25, v25 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v26, v26 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v27, v27 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v28, v28 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v29, v29 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v30, v30 +; GFX7-SDAG-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:128 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v28, v28 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v29, v29 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v30, v30 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:8 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v32, v32 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v32, v32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v4, v4, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v5, v5, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v6, v6, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v7, v7, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v8, v8, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:40 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v9, v9, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v10, v10, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:48 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v11, v11, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v12, v12, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:56 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v13, v13, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v14, v14, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v15, v15, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v16, v16, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:72 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v17, v17, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v18, v18, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:80 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v19, v19, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v20, v20, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:88 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v21, v21, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v22, v22, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:96 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v23, v23, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v24, v24, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:104 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v25, v25, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v26, v26, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:112 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v27, v27, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v28, v28, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:120 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v29, v29, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:124 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v30, v30, v31 +; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-SDAG-NEXT: v_min_f32_e32 v31, v31, v32 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_v32f16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v9 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v10, v10 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v11, v11 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v12, v12 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v13, v13 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v14, v14 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v15, v15 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v16 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v17, v17 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v18, v18 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v19, v19 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v20, v20 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v21, v21 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v22, v22 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v23, v23 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v24, v24 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v25, v25 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v26, v26 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v27, v27 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v28, v28 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v29, v29 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v30, v30 +; GFX7-GISEL-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:128 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:8 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v32, v32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:16 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v4, v4, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v5, v5, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v6, v6, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:32 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v7, v7, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v8, v8, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:40 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v8, v8 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v9, v9, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v9, v9 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v10, v10, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:48 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v10, v10 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v11, v11, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v11, v11 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v12, v12, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:56 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v12, v12 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v13, v13, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v13, v13 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v14, v14, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:64 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v14, v14 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v15, v15, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v15, v15 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v16, v16, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:72 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v16, v16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v17, v17, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v17, v17 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v18, v18, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:80 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v18, v18 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v19, v19, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v19, v19 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v20, v20, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:88 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v20, v20 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v21, v21, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v21, v21 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v22, v22, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:96 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v22, v22 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v23, v23, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v23, v23 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v24, v24, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:104 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v24, v24 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v25, v25, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v25, v25 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v26, v26, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:112 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v26, v26 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v27, v27, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v27, v27 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v28, v28, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:120 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v28, v28 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v29, v29, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:124 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v29, v29 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v30, v30, v31 +; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v30, v30 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 +; GFX7-GISEL-NEXT: v_min_f32_e32 v31, v31, v32 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v31, v31 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_v32f16: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX8-SDAG-NEXT: v_max_f16_sdwa v38, v27, v27 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v39, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v48, v26, v26 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v49, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v50, v25, v25 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v51, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v40, v22, v22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v41, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v58, v17, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v59, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v17, v17, v17 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v52, v24, v24 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v53, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v54, v23, v23 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v55, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v42, v21, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v43, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v44, v20, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v45, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v46, v19, v19 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v47, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v56, v18, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v57, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v38, v39, v38 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_min_f16_sdwa v39, v49, v48 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_min_f16_sdwa v48, v51, v50 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_min_f16_sdwa v51, v41, v40 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_min_f16_sdwa v40, v59, v58 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v17 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v49, v53, v52 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_min_f16_sdwa v50, v55, v54 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_min_f16_sdwa v52, v43, v42 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_min_f16_sdwa v53, v45, v44 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_min_f16_sdwa v54, v47, v46 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_min_f16_sdwa v55, v57, v56 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v40 +; GFX8-SDAG-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; GFX8-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v32, v30, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v33, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v34, v29, v29 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v35, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v36, v28, v28 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v37, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v32, v33, v32 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v33, v16, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v34, v35, v34 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v35, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v36, v37, v36 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_sdwa v37, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v15, v15, v15 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v33, v35, v33 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_max_f16_e32 v30, v30, v30 +; GFX8-SDAG-NEXT: v_max_f16_e32 v14, v14, v14 +; GFX8-SDAG-NEXT: v_max_f16_e32 v29, v29, v29 +; GFX8-SDAG-NEXT: v_max_f16_e32 v13, v13, v13 +; GFX8-SDAG-NEXT: v_max_f16_e32 v28, v28, v28 +; GFX8-SDAG-NEXT: v_max_f16_e32 v12, v12, v12 +; GFX8-SDAG-NEXT: v_max_f16_e32 v27, v27, v27 +; GFX8-SDAG-NEXT: v_max_f16_e32 v11, v11, v11 +; GFX8-SDAG-NEXT: v_max_f16_e32 v26, v26, v26 +; GFX8-SDAG-NEXT: v_max_f16_e32 v10, v10, v10 +; GFX8-SDAG-NEXT: v_max_f16_e32 v25, v25, v25 +; GFX8-SDAG-NEXT: v_max_f16_e32 v9, v9, v9 +; GFX8-SDAG-NEXT: v_max_f16_e32 v24, v24, v24 +; GFX8-SDAG-NEXT: v_max_f16_e32 v8, v8, v8 +; GFX8-SDAG-NEXT: v_max_f16_e32 v23, v23, v23 +; GFX8-SDAG-NEXT: v_max_f16_e32 v7, v7, v7 +; GFX8-SDAG-NEXT: v_max_f16_e32 v22, v22, v22 +; GFX8-SDAG-NEXT: v_max_f16_e32 v6, v6, v6 +; GFX8-SDAG-NEXT: v_max_f16_e32 v21, v21, v21 +; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX8-SDAG-NEXT: v_max_f16_e32 v20, v20, v20 +; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX8-SDAG-NEXT: v_max_f16_e32 v19, v19, v19 +; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3 +; GFX8-SDAG-NEXT: v_max_f16_e32 v18, v18, v18 +; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX8-SDAG-NEXT: v_max_f16_e32 v16, v16, v16 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_min_f16_e32 v14, v14, v30 +; GFX8-SDAG-NEXT: v_min_f16_e32 v13, v13, v29 +; GFX8-SDAG-NEXT: v_min_f16_e32 v12, v12, v28 +; GFX8-SDAG-NEXT: v_min_f16_e32 v11, v11, v27 +; GFX8-SDAG-NEXT: v_min_f16_e32 v10, v10, v26 +; GFX8-SDAG-NEXT: v_min_f16_e32 v9, v9, v25 +; GFX8-SDAG-NEXT: v_min_f16_e32 v8, v8, v24 +; GFX8-SDAG-NEXT: v_min_f16_e32 v7, v7, v23 +; GFX8-SDAG-NEXT: v_min_f16_e32 v6, v6, v22 +; GFX8-SDAG-NEXT: v_min_f16_e32 v5, v5, v21 +; GFX8-SDAG-NEXT: v_min_f16_e32 v4, v4, v20 +; GFX8-SDAG-NEXT: v_min_f16_e32 v3, v3, v19 +; GFX8-SDAG-NEXT: v_min_f16_e32 v2, v2, v18 +; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v16 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v33 +; GFX8-SDAG-NEXT: v_or_b32_e32 v2, v2, v55 +; GFX8-SDAG-NEXT: v_or_b32_e32 v3, v3, v54 +; GFX8-SDAG-NEXT: v_or_b32_e32 v4, v4, v53 +; GFX8-SDAG-NEXT: v_or_b32_e32 v5, v5, v52 +; GFX8-SDAG-NEXT: v_or_b32_e32 v6, v6, v51 +; GFX8-SDAG-NEXT: v_or_b32_e32 v7, v7, v50 +; GFX8-SDAG-NEXT: v_or_b32_e32 v8, v8, v49 +; GFX8-SDAG-NEXT: v_or_b32_e32 v9, v9, v48 +; GFX8-SDAG-NEXT: v_or_b32_e32 v10, v10, v39 +; GFX8-SDAG-NEXT: v_or_b32_e32 v11, v11, v38 +; GFX8-SDAG-NEXT: v_or_b32_e32 v12, v12, v36 +; GFX8-SDAG-NEXT: v_or_b32_e32 v13, v13, v34 +; GFX8-SDAG-NEXT: v_or_b32_e32 v14, v14, v32 +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_sdwa v35, v31, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v31, v31, v31 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v35, v37, v35 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_min_f16_e32 v15, v15, v31 +; GFX8-SDAG-NEXT: v_or_b32_e32 v15, v15, v35 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_v32f16: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v31, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v16, v16 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v16, v16, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v31, v31, v32 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v16, v1, v1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v17, v17 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v17, v17, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v16, v16, v32 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v1, v1, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v2, v2 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v18, v18 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v18, v18, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v17, v17, v32 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v2, v2, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v18, v3, v3 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v19, v19 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v19, v19, v19 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v18, v18, v32 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v3, v3, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v19, v4, v4 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v20, v20 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v20, v20, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v19, v19, v32 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v4, v4, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v20, v5, v5 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v21, v21 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v21, v21, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v20, v20, v32 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v5, v5, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v21, v6, v6 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v22, v22 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v22, v22, v22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v21, v21, v32 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v6, v6, v22 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v22, v7, v7 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v23, v23 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v23, v23, v23 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v22, v22, v32 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v7, v7, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v23, v8, v8 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v24, v24 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v8, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v24, v24, v24 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v23, v23, v32 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v8, v8, v24 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v24, v9, v9 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v25, v25 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v9, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v25, v25, v25 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v24, v24, v32 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v9, v9, v25 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v25, v10, v10 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v26, v26 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v10, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v26, v26, v26 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v25, v25, v32 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v10, v10, v26 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v26, v11, v11 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v27, v27 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v11, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v27, v27, v27 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v26, v26, v32 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v11, v11, v27 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v27, v12, v12 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v28, v28 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v12, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v28, v28, v28 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v27, v27, v32 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v12, v12, v28 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v28, v13, v13 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v29, v29 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v13, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v29, v29, v29 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v28, v28, v32 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v13, v13, v29 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_max_f16_e32 v29, v14, v14 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v30, v30 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v14, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v30, v30, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v14, v14, v30 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: buffer_load_dword v30, off, s[0:3], s32 +; GFX8-GISEL-NEXT: v_min_f16_e32 v29, v29, v32 +; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v15, v15 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v15, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v31, v0 +; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v16, v1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v2, v17, v2 +; GFX8-GISEL-NEXT: v_or_b32_e32 v3, v18, v3 +; GFX8-GISEL-NEXT: v_or_b32_e32 v4, v19, v4 +; GFX8-GISEL-NEXT: v_or_b32_e32 v5, v20, v5 +; GFX8-GISEL-NEXT: v_or_b32_e32 v6, v21, v6 +; GFX8-GISEL-NEXT: v_or_b32_e32 v7, v22, v7 +; GFX8-GISEL-NEXT: v_or_b32_e32 v8, v23, v8 +; GFX8-GISEL-NEXT: v_or_b32_e32 v9, v24, v9 +; GFX8-GISEL-NEXT: v_or_b32_e32 v10, v25, v10 +; GFX8-GISEL-NEXT: v_or_b32_e32 v11, v26, v11 +; GFX8-GISEL-NEXT: v_or_b32_e32 v12, v27, v12 +; GFX8-GISEL-NEXT: v_or_b32_e32 v13, v28, v13 +; GFX8-GISEL-NEXT: v_or_b32_e32 v14, v29, v14 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v33, v30, v30 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v30, v30, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v32, v32, v33 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v15, v15, v30 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_e32 v15, v32, v15 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_minimumnum_v32f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v16, v16 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v16 +; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v17, v17 +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v16 +; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v18, v18 +; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX900-SDAG-NEXT: v_pk_min_f16 v2, v2, v16 +; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v19, v19 +; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX900-SDAG-NEXT: v_pk_min_f16 v3, v3, v16 +; GFX900-SDAG-NEXT: buffer_load_dword v16, off, s[0:3], s32 +; GFX900-SDAG-NEXT: v_pk_max_f16 v17, v20, v20 +; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX900-SDAG-NEXT: v_pk_max_f16 v18, v21, v21 +; GFX900-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX900-SDAG-NEXT: v_pk_max_f16 v19, v22, v22 +; GFX900-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX900-SDAG-NEXT: v_pk_max_f16 v20, v23, v23 +; GFX900-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX900-SDAG-NEXT: v_pk_max_f16 v21, v24, v24 +; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX900-SDAG-NEXT: v_pk_max_f16 v22, v25, v25 +; GFX900-SDAG-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX900-SDAG-NEXT: v_pk_max_f16 v23, v26, v26 +; GFX900-SDAG-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX900-SDAG-NEXT: v_pk_max_f16 v24, v27, v27 +; GFX900-SDAG-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX900-SDAG-NEXT: v_pk_max_f16 v25, v28, v28 +; GFX900-SDAG-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX900-SDAG-NEXT: v_pk_max_f16 v26, v29, v29 +; GFX900-SDAG-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX900-SDAG-NEXT: v_pk_max_f16 v27, v30, v30 +; GFX900-SDAG-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX900-SDAG-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX900-SDAG-NEXT: v_pk_min_f16 v4, v4, v17 +; GFX900-SDAG-NEXT: v_pk_min_f16 v5, v5, v18 +; GFX900-SDAG-NEXT: v_pk_min_f16 v6, v6, v19 +; GFX900-SDAG-NEXT: v_pk_min_f16 v7, v7, v20 +; GFX900-SDAG-NEXT: v_pk_min_f16 v8, v8, v21 +; GFX900-SDAG-NEXT: v_pk_min_f16 v9, v9, v22 +; GFX900-SDAG-NEXT: v_pk_min_f16 v10, v10, v23 +; GFX900-SDAG-NEXT: v_pk_min_f16 v11, v11, v24 +; GFX900-SDAG-NEXT: v_pk_min_f16 v12, v12, v25 +; GFX900-SDAG-NEXT: v_pk_min_f16 v13, v13, v26 +; GFX900-SDAG-NEXT: v_pk_min_f16 v14, v14, v27 +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v16, v16 +; GFX900-SDAG-NEXT: v_pk_min_f16 v15, v15, v16 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_v32f16: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v16, v16 +; GFX900-GISEL-NEXT: v_pk_min_f16 v0, v0, v16 +; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v17, v17 +; GFX900-GISEL-NEXT: v_pk_min_f16 v1, v1, v16 +; GFX900-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v18, v18 +; GFX900-GISEL-NEXT: v_pk_min_f16 v2, v2, v16 +; GFX900-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v19, v19 +; GFX900-GISEL-NEXT: v_pk_min_f16 v3, v3, v16 +; GFX900-GISEL-NEXT: buffer_load_dword v16, off, s[0:3], s32 +; GFX900-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX900-GISEL-NEXT: v_pk_max_f16 v17, v20, v20 +; GFX900-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX900-GISEL-NEXT: v_pk_max_f16 v18, v21, v21 +; GFX900-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX900-GISEL-NEXT: v_pk_max_f16 v19, v22, v22 +; GFX900-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX900-GISEL-NEXT: v_pk_max_f16 v20, v23, v23 +; GFX900-GISEL-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX900-GISEL-NEXT: v_pk_max_f16 v21, v24, v24 +; GFX900-GISEL-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX900-GISEL-NEXT: v_pk_max_f16 v22, v25, v25 +; GFX900-GISEL-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX900-GISEL-NEXT: v_pk_max_f16 v23, v26, v26 +; GFX900-GISEL-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX900-GISEL-NEXT: v_pk_max_f16 v24, v27, v27 +; GFX900-GISEL-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX900-GISEL-NEXT: v_pk_max_f16 v25, v28, v28 +; GFX900-GISEL-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX900-GISEL-NEXT: v_pk_max_f16 v26, v29, v29 +; GFX900-GISEL-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX900-GISEL-NEXT: v_pk_max_f16 v27, v30, v30 +; GFX900-GISEL-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX900-GISEL-NEXT: v_pk_min_f16 v4, v4, v17 +; GFX900-GISEL-NEXT: v_pk_min_f16 v5, v5, v18 +; GFX900-GISEL-NEXT: v_pk_min_f16 v6, v6, v19 +; GFX900-GISEL-NEXT: v_pk_min_f16 v7, v7, v20 +; GFX900-GISEL-NEXT: v_pk_min_f16 v8, v8, v21 +; GFX900-GISEL-NEXT: v_pk_min_f16 v9, v9, v22 +; GFX900-GISEL-NEXT: v_pk_min_f16 v10, v10, v23 +; GFX900-GISEL-NEXT: v_pk_min_f16 v11, v11, v24 +; GFX900-GISEL-NEXT: v_pk_min_f16 v12, v12, v25 +; GFX900-GISEL-NEXT: v_pk_min_f16 v13, v13, v26 +; GFX900-GISEL-NEXT: v_pk_min_f16 v14, v14, v27 +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v16, v16 +; GFX900-GISEL-NEXT: v_pk_min_f16 v15, v15, v16 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_v32f16: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: scratch_load_dword v31, off, s32 +; GFX950-SDAG-NEXT: v_pk_max_f16 v16, v16, v16 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-SDAG-NEXT: v_pk_max_f16 v17, v17, v17 +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-SDAG-NEXT: v_pk_max_f16 v18, v18, v18 +; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX950-SDAG-NEXT: v_pk_max_f16 v19, v19, v19 +; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX950-SDAG-NEXT: v_pk_max_f16 v20, v20, v20 +; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX950-SDAG-NEXT: v_pk_max_f16 v21, v21, v21 +; GFX950-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX950-SDAG-NEXT: v_pk_max_f16 v22, v22, v22 +; GFX950-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX950-SDAG-NEXT: v_pk_max_f16 v23, v23, v23 +; GFX950-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX950-SDAG-NEXT: v_pk_max_f16 v24, v24, v24 +; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX950-SDAG-NEXT: v_pk_max_f16 v25, v25, v25 +; GFX950-SDAG-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX950-SDAG-NEXT: v_pk_max_f16 v26, v26, v26 +; GFX950-SDAG-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX950-SDAG-NEXT: v_pk_max_f16 v27, v27, v27 +; GFX950-SDAG-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX950-SDAG-NEXT: v_pk_max_f16 v28, v28, v28 +; GFX950-SDAG-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX950-SDAG-NEXT: v_pk_max_f16 v29, v29, v29 +; GFX950-SDAG-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX950-SDAG-NEXT: v_pk_max_f16 v30, v30, v30 +; GFX950-SDAG-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX950-SDAG-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v16 +; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v17 +; GFX950-SDAG-NEXT: v_pk_min_f16 v2, v2, v18 +; GFX950-SDAG-NEXT: v_pk_min_f16 v3, v3, v19 +; GFX950-SDAG-NEXT: v_pk_min_f16 v4, v4, v20 +; GFX950-SDAG-NEXT: v_pk_min_f16 v5, v5, v21 +; GFX950-SDAG-NEXT: v_pk_min_f16 v6, v6, v22 +; GFX950-SDAG-NEXT: v_pk_min_f16 v7, v7, v23 +; GFX950-SDAG-NEXT: v_pk_min_f16 v8, v8, v24 +; GFX950-SDAG-NEXT: v_pk_min_f16 v9, v9, v25 +; GFX950-SDAG-NEXT: v_pk_min_f16 v10, v10, v26 +; GFX950-SDAG-NEXT: v_pk_min_f16 v11, v11, v27 +; GFX950-SDAG-NEXT: v_pk_min_f16 v12, v12, v28 +; GFX950-SDAG-NEXT: v_pk_min_f16 v13, v13, v29 +; GFX950-SDAG-NEXT: v_pk_min_f16 v14, v14, v30 +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX950-SDAG-NEXT: v_pk_max_f16 v16, v31, v31 +; GFX950-SDAG-NEXT: s_nop 0 +; GFX950-SDAG-NEXT: v_pk_min_f16 v15, v15, v16 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_v32f16: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: scratch_load_dword v31, off, s32 +; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-GISEL-NEXT: v_pk_max_f16 v16, v16, v16 +; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX950-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX950-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX950-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX950-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX950-GISEL-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX950-GISEL-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX950-GISEL-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX950-GISEL-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX950-GISEL-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX950-GISEL-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX950-GISEL-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX950-GISEL-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX950-GISEL-NEXT: v_pk_max_f16 v17, v17, v17 +; GFX950-GISEL-NEXT: v_pk_max_f16 v18, v18, v18 +; GFX950-GISEL-NEXT: v_pk_max_f16 v19, v19, v19 +; GFX950-GISEL-NEXT: v_pk_max_f16 v20, v20, v20 +; GFX950-GISEL-NEXT: v_pk_max_f16 v21, v21, v21 +; GFX950-GISEL-NEXT: v_pk_max_f16 v22, v22, v22 +; GFX950-GISEL-NEXT: v_pk_max_f16 v23, v23, v23 +; GFX950-GISEL-NEXT: v_pk_max_f16 v24, v24, v24 +; GFX950-GISEL-NEXT: v_pk_max_f16 v25, v25, v25 +; GFX950-GISEL-NEXT: v_pk_max_f16 v26, v26, v26 +; GFX950-GISEL-NEXT: v_pk_max_f16 v27, v27, v27 +; GFX950-GISEL-NEXT: v_pk_max_f16 v28, v28, v28 +; GFX950-GISEL-NEXT: v_pk_max_f16 v29, v29, v29 +; GFX950-GISEL-NEXT: v_pk_max_f16 v30, v30, v30 +; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v16 +; GFX950-GISEL-NEXT: v_pk_min_f16 v1, v1, v17 +; GFX950-GISEL-NEXT: v_pk_min_f16 v2, v2, v18 +; GFX950-GISEL-NEXT: v_pk_min_f16 v3, v3, v19 +; GFX950-GISEL-NEXT: v_pk_min_f16 v4, v4, v20 +; GFX950-GISEL-NEXT: v_pk_min_f16 v5, v5, v21 +; GFX950-GISEL-NEXT: v_pk_min_f16 v6, v6, v22 +; GFX950-GISEL-NEXT: v_pk_min_f16 v7, v7, v23 +; GFX950-GISEL-NEXT: v_pk_min_f16 v8, v8, v24 +; GFX950-GISEL-NEXT: v_pk_min_f16 v9, v9, v25 +; GFX950-GISEL-NEXT: v_pk_min_f16 v10, v10, v26 +; GFX950-GISEL-NEXT: v_pk_min_f16 v11, v11, v27 +; GFX950-GISEL-NEXT: v_pk_min_f16 v12, v12, v28 +; GFX950-GISEL-NEXT: v_pk_min_f16 v13, v13, v29 +; GFX950-GISEL-NEXT: v_pk_min_f16 v14, v14, v30 +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX950-GISEL-NEXT: v_pk_max_f16 v16, v31, v31 +; GFX950-GISEL-NEXT: s_nop 0 +; GFX950-GISEL-NEXT: v_pk_min_f16 v15, v15, v16 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_v32f16: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; GFX10-SDAG-NEXT: v_pk_max_f16 v16, v16, v16 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-SDAG-NEXT: v_pk_max_f16 v17, v17, v17 +; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-SDAG-NEXT: v_pk_max_f16 v18, v18, v18 +; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-SDAG-NEXT: v_pk_max_f16 v19, v19, v19 +; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-SDAG-NEXT: v_pk_max_f16 v20, v20, v20 +; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX10-SDAG-NEXT: v_pk_max_f16 v21, v21, v21 +; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX10-SDAG-NEXT: v_pk_max_f16 v22, v22, v22 +; GFX10-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX10-SDAG-NEXT: v_pk_max_f16 v23, v23, v23 +; GFX10-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX10-SDAG-NEXT: v_pk_max_f16 v24, v24, v24 +; GFX10-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX10-SDAG-NEXT: v_pk_max_f16 v25, v25, v25 +; GFX10-SDAG-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX10-SDAG-NEXT: v_pk_max_f16 v26, v26, v26 +; GFX10-SDAG-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX10-SDAG-NEXT: v_pk_max_f16 v27, v27, v27 +; GFX10-SDAG-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX10-SDAG-NEXT: v_pk_max_f16 v28, v28, v28 +; GFX10-SDAG-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX10-SDAG-NEXT: v_pk_max_f16 v29, v29, v29 +; GFX10-SDAG-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX10-SDAG-NEXT: v_pk_max_f16 v30, v30, v30 +; GFX10-SDAG-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX10-SDAG-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v16 +; GFX10-SDAG-NEXT: v_pk_min_f16 v1, v1, v17 +; GFX10-SDAG-NEXT: v_pk_min_f16 v2, v2, v18 +; GFX10-SDAG-NEXT: v_pk_min_f16 v3, v3, v19 +; GFX10-SDAG-NEXT: v_pk_min_f16 v4, v4, v20 +; GFX10-SDAG-NEXT: v_pk_min_f16 v5, v5, v21 +; GFX10-SDAG-NEXT: v_pk_min_f16 v6, v6, v22 +; GFX10-SDAG-NEXT: v_pk_min_f16 v7, v7, v23 +; GFX10-SDAG-NEXT: v_pk_min_f16 v8, v8, v24 +; GFX10-SDAG-NEXT: v_pk_min_f16 v9, v9, v25 +; GFX10-SDAG-NEXT: v_pk_min_f16 v10, v10, v26 +; GFX10-SDAG-NEXT: v_pk_min_f16 v11, v11, v27 +; GFX10-SDAG-NEXT: v_pk_min_f16 v12, v12, v28 +; GFX10-SDAG-NEXT: v_pk_min_f16 v13, v13, v29 +; GFX10-SDAG-NEXT: v_pk_min_f16 v14, v14, v30 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: v_pk_max_f16 v16, v31, v31 +; GFX10-SDAG-NEXT: v_pk_min_f16 v15, v15, v16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_v32f16: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-GISEL-NEXT: v_pk_max_f16 v16, v16, v16 +; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX10-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX10-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX10-GISEL-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX10-GISEL-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX10-GISEL-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX10-GISEL-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX10-GISEL-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX10-GISEL-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX10-GISEL-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX10-GISEL-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX10-GISEL-NEXT: v_pk_max_f16 v17, v17, v17 +; GFX10-GISEL-NEXT: v_pk_max_f16 v18, v18, v18 +; GFX10-GISEL-NEXT: v_pk_max_f16 v19, v19, v19 +; GFX10-GISEL-NEXT: v_pk_max_f16 v20, v20, v20 +; GFX10-GISEL-NEXT: v_pk_max_f16 v21, v21, v21 +; GFX10-GISEL-NEXT: v_pk_max_f16 v22, v22, v22 +; GFX10-GISEL-NEXT: v_pk_max_f16 v23, v23, v23 +; GFX10-GISEL-NEXT: v_pk_max_f16 v24, v24, v24 +; GFX10-GISEL-NEXT: v_pk_max_f16 v25, v25, v25 +; GFX10-GISEL-NEXT: v_pk_max_f16 v26, v26, v26 +; GFX10-GISEL-NEXT: v_pk_max_f16 v27, v27, v27 +; GFX10-GISEL-NEXT: v_pk_max_f16 v28, v28, v28 +; GFX10-GISEL-NEXT: v_pk_max_f16 v29, v29, v29 +; GFX10-GISEL-NEXT: v_pk_max_f16 v30, v30, v30 +; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v16 +; GFX10-GISEL-NEXT: v_pk_min_f16 v1, v1, v17 +; GFX10-GISEL-NEXT: v_pk_min_f16 v2, v2, v18 +; GFX10-GISEL-NEXT: v_pk_min_f16 v3, v3, v19 +; GFX10-GISEL-NEXT: v_pk_min_f16 v4, v4, v20 +; GFX10-GISEL-NEXT: v_pk_min_f16 v5, v5, v21 +; GFX10-GISEL-NEXT: v_pk_min_f16 v6, v6, v22 +; GFX10-GISEL-NEXT: v_pk_min_f16 v7, v7, v23 +; GFX10-GISEL-NEXT: v_pk_min_f16 v8, v8, v24 +; GFX10-GISEL-NEXT: v_pk_min_f16 v9, v9, v25 +; GFX10-GISEL-NEXT: v_pk_min_f16 v10, v10, v26 +; GFX10-GISEL-NEXT: v_pk_min_f16 v11, v11, v27 +; GFX10-GISEL-NEXT: v_pk_min_f16 v12, v12, v28 +; GFX10-GISEL-NEXT: v_pk_min_f16 v13, v13, v29 +; GFX10-GISEL-NEXT: v_pk_min_f16 v14, v14, v30 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: v_pk_max_f16 v16, v31, v31 +; GFX10-GISEL-NEXT: v_pk_min_f16 v15, v15, v16 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_v32f16: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: scratch_load_b32 v31, off, s32 +; GFX11-SDAG-NEXT: v_pk_max_f16 v16, v16, v16 +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-SDAG-NEXT: v_pk_max_f16 v17, v17, v17 +; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-SDAG-NEXT: v_pk_max_f16 v18, v18, v18 +; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-SDAG-NEXT: v_pk_max_f16 v19, v19, v19 +; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-SDAG-NEXT: v_pk_max_f16 v20, v20, v20 +; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX11-SDAG-NEXT: v_pk_max_f16 v21, v21, v21 +; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX11-SDAG-NEXT: v_pk_max_f16 v22, v22, v22 +; GFX11-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX11-SDAG-NEXT: v_pk_max_f16 v23, v23, v23 +; GFX11-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX11-SDAG-NEXT: v_pk_max_f16 v24, v24, v24 +; GFX11-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX11-SDAG-NEXT: v_pk_max_f16 v25, v25, v25 +; GFX11-SDAG-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX11-SDAG-NEXT: v_pk_max_f16 v26, v26, v26 +; GFX11-SDAG-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX11-SDAG-NEXT: v_pk_max_f16 v27, v27, v27 +; GFX11-SDAG-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX11-SDAG-NEXT: v_pk_max_f16 v28, v28, v28 +; GFX11-SDAG-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX11-SDAG-NEXT: v_pk_max_f16 v29, v29, v29 +; GFX11-SDAG-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX11-SDAG-NEXT: v_pk_max_f16 v30, v30, v30 +; GFX11-SDAG-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX11-SDAG-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v16 +; GFX11-SDAG-NEXT: v_pk_min_f16 v1, v1, v17 +; GFX11-SDAG-NEXT: v_pk_min_f16 v2, v2, v18 +; GFX11-SDAG-NEXT: v_pk_min_f16 v3, v3, v19 +; GFX11-SDAG-NEXT: v_pk_min_f16 v4, v4, v20 +; GFX11-SDAG-NEXT: v_pk_min_f16 v5, v5, v21 +; GFX11-SDAG-NEXT: v_pk_min_f16 v6, v6, v22 +; GFX11-SDAG-NEXT: v_pk_min_f16 v7, v7, v23 +; GFX11-SDAG-NEXT: v_pk_min_f16 v8, v8, v24 +; GFX11-SDAG-NEXT: v_pk_min_f16 v9, v9, v25 +; GFX11-SDAG-NEXT: v_pk_min_f16 v10, v10, v26 +; GFX11-SDAG-NEXT: v_pk_min_f16 v11, v11, v27 +; GFX11-SDAG-NEXT: v_pk_min_f16 v12, v12, v28 +; GFX11-SDAG-NEXT: v_pk_min_f16 v13, v13, v29 +; GFX11-SDAG-NEXT: v_pk_min_f16 v14, v14, v30 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: v_pk_max_f16 v16, v31, v31 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_pk_min_f16 v15, v15, v16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_v32f16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: scratch_load_b32 v31, off, s32 +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-GISEL-NEXT: v_pk_max_f16 v16, v16, v16 +; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX11-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX11-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX11-GISEL-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX11-GISEL-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX11-GISEL-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX11-GISEL-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX11-GISEL-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX11-GISEL-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX11-GISEL-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX11-GISEL-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX11-GISEL-NEXT: v_pk_max_f16 v17, v17, v17 +; GFX11-GISEL-NEXT: v_pk_max_f16 v18, v18, v18 +; GFX11-GISEL-NEXT: v_pk_max_f16 v19, v19, v19 +; GFX11-GISEL-NEXT: v_pk_max_f16 v20, v20, v20 +; GFX11-GISEL-NEXT: v_pk_max_f16 v21, v21, v21 +; GFX11-GISEL-NEXT: v_pk_max_f16 v22, v22, v22 +; GFX11-GISEL-NEXT: v_pk_max_f16 v23, v23, v23 +; GFX11-GISEL-NEXT: v_pk_max_f16 v24, v24, v24 +; GFX11-GISEL-NEXT: v_pk_max_f16 v25, v25, v25 +; GFX11-GISEL-NEXT: v_pk_max_f16 v26, v26, v26 +; GFX11-GISEL-NEXT: v_pk_max_f16 v27, v27, v27 +; GFX11-GISEL-NEXT: v_pk_max_f16 v28, v28, v28 +; GFX11-GISEL-NEXT: v_pk_max_f16 v29, v29, v29 +; GFX11-GISEL-NEXT: v_pk_max_f16 v30, v30, v30 +; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v16 +; GFX11-GISEL-NEXT: v_pk_min_f16 v1, v1, v17 +; GFX11-GISEL-NEXT: v_pk_min_f16 v2, v2, v18 +; GFX11-GISEL-NEXT: v_pk_min_f16 v3, v3, v19 +; GFX11-GISEL-NEXT: v_pk_min_f16 v4, v4, v20 +; GFX11-GISEL-NEXT: v_pk_min_f16 v5, v5, v21 +; GFX11-GISEL-NEXT: v_pk_min_f16 v6, v6, v22 +; GFX11-GISEL-NEXT: v_pk_min_f16 v7, v7, v23 +; GFX11-GISEL-NEXT: v_pk_min_f16 v8, v8, v24 +; GFX11-GISEL-NEXT: v_pk_min_f16 v9, v9, v25 +; GFX11-GISEL-NEXT: v_pk_min_f16 v10, v10, v26 +; GFX11-GISEL-NEXT: v_pk_min_f16 v11, v11, v27 +; GFX11-GISEL-NEXT: v_pk_min_f16 v12, v12, v28 +; GFX11-GISEL-NEXT: v_pk_min_f16 v13, v13, v29 +; GFX11-GISEL-NEXT: v_pk_min_f16 v14, v14, v30 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: v_pk_max_f16 v16, v31, v31 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_pk_min_f16 v15, v15, v16 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_v32f16: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: scratch_load_b32 v31, off, s32 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v16, v16, v16 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v17, v17, v17 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v18, v18, v18 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v19, v19, v19 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v20, v20, v20 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v21, v21, v21 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v22, v22, v22 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v23, v23, v23 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v24, v24, v24 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v8, v8, v8 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v25, v25, v25 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v9, v9, v9 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v26, v26, v26 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v10, v10, v10 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v27, v27, v27 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v11, v11, v11 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v28, v28, v28 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v12, v12, v12 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v29, v29, v29 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v13, v13, v13 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v30, v30, v30 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v14, v14, v14 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v15, v15, v15 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v16 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v17 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v2, v2, v18 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v3, v3, v19 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v4, v4, v20 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v5, v5, v21 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v6, v6, v22 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v7, v7, v23 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v8, v8, v24 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v9, v9, v25 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v10, v10, v26 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v11, v11, v27 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v12, v12, v28 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v13, v13, v29 +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v14, v14, v30 +; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v16, v31, v31 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v15, v15, v16 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_v32f16: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: scratch_load_b32 v31, off, s32 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v16, v16, v16 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v8, v8, v8 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v9, v9, v9 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v10, v10, v10 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v11, v11, v11 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v12, v12, v12 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v13, v13, v13 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v14, v14, v14 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v15, v15, v15 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v17, v17, v17 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v18, v18, v18 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v19, v19, v19 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v20, v20, v20 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v21, v21, v21 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v22, v22, v22 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v23, v23, v23 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v24, v24, v24 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v25, v25, v25 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v26, v26, v26 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v27, v27, v27 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v28, v28, v28 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v29, v29, v29 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v30, v30, v30 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v16 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v17 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v2, v2, v18 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v3, v3, v19 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v4, v4, v20 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v5, v5, v21 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v6, v6, v22 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v7, v7, v23 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v8, v8, v24 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v9, v9, v25 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v10, v10, v26 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v11, v11, v27 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v12, v12, v28 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v13, v13, v29 +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v14, v14, v30 +; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v16, v31, v31 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v15, v15, v16 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <32 x half> @llvm.minimumnum.v32f16(<32 x half> %x, <32 x half> %y) + ret <32 x half> %result +} + +define <2 x float> @v_minimumnum_v2f32(<2 x float> %x, <2 x float> %y) { +; GFX7-SDAG-LABEL: v_minimumnum_v2f32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v3 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v2 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_v2f32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_v2f32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v3 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX8-SDAG-NEXT: v_min_f32_e32 v1, v1, v2 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_v2f32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX8-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_minimumnum_v2f32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX9-SDAG-NEXT: v_min_f32_e32 v1, v1, v2 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_v2f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX900-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_v2f32: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_pk_mul_f32 v[0:1], 1.0, v[0:1] op_sel_hi:[0,1] +; GFX950-GISEL-NEXT: v_pk_mul_f32 v[2:3], 1.0, v[2:3] op_sel_hi:[0,1] +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX950-GISEL-NEXT: v_min_f32_e32 v1, v1, v2 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_v2f32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX10-SDAG-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_v2f32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX10-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX10-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX10-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_v2f32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 +; GFX11-SDAG-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_v2f32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 +; GFX11-GISEL-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_v2f32: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_v2f32: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> %x, <2 x float> %y) ret <2 x float> %result } @@ -3794,87 +6591,190 @@ define <2 x float> @v_minimumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) { } define <3 x float> @v_minimumnum_v3f32(<3 x float> %x, <3 x float> %y) { -; GFX7-LABEL: v_minimumnum_v3f32: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3 -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v3 -; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v4 -; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 -; GFX7-NEXT: v_min_f32_e32 v1, v1, v3 -; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v5 -; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2 -; GFX7-NEXT: v_min_f32_e32 v2, v2, v3 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_v3f32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v3 -; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX8-NEXT: v_min_f32_e32 v0, v0, v3 -; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v4 -; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 -; GFX8-NEXT: v_min_f32_e32 v1, v1, v3 -; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v5 -; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2 -; GFX8-NEXT: v_min_f32_e32 v2, v2, v3 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_minimumnum_v3f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v3, v3, v3 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-NEXT: v_min_f32_e32 v0, v0, v3 -; GFX9-NEXT: v_max_f32_e32 v3, v4, v4 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-NEXT: v_min_f32_e32 v1, v1, v3 -; GFX9-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX9-NEXT: v_min_f32_e32 v2, v2, v3 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_v3f32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e32 v3, v3, v3 -; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX10-NEXT: v_max_f32_e32 v4, v4, v4 -; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX10-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX10-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX10-NEXT: v_min_f32_e32 v0, v0, v3 -; GFX10-NEXT: v_min_f32_e32 v1, v1, v4 -; GFX10-NEXT: v_min_f32_e32 v2, v2, v5 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_v3f32: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_dual_max_f32 v3, v3, v3 :: v_dual_max_f32 v0, v0, v0 -; GFX11-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v1, v1, v1 -; GFX11-NEXT: v_dual_max_f32 v5, v5, v5 :: v_dual_max_f32 v2, v2, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_dual_min_f32 v0, v0, v3 :: v_dual_min_f32 v1, v1, v4 -; GFX11-NEXT: v_min_f32_e32 v2, v2, v5 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_v3f32: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_dual_max_num_f32 v3, v3, v3 :: v_dual_max_num_f32 v0, v0, v0 -; GFX12-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v1, v1, v1 -; GFX12-NEXT: v_dual_max_num_f32 v5, v5, v5 :: v_dual_max_num_f32 v2, v2, v2 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v3 :: v_dual_min_num_f32 v1, v1, v4 -; GFX12-NEXT: v_min_num_f32_e32 v2, v2, v5 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_v3f32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v4 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v5 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v3 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_v3f32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v4 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v5, 1.0, v5 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v4 +; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v5 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_v3f32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v4 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX8-SDAG-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v5 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX8-SDAG-NEXT: v_min_f32_e32 v2, v2, v3 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_v3f32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v4 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v5, 1.0, v5 +; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX8-GISEL-NEXT: v_min_f32_e32 v1, v1, v4 +; GFX8-GISEL-NEXT: v_min_f32_e32 v2, v2, v5 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_minimumnum_v3f32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v4, v4 +; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX9-SDAG-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX9-SDAG-NEXT: v_min_f32_e32 v2, v2, v3 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_v3f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX900-GISEL-NEXT: v_max_f32_e32 v5, v5, v5 +; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX900-GISEL-NEXT: v_min_f32_e32 v1, v1, v4 +; GFX900-GISEL-NEXT: v_min_f32_e32 v2, v2, v5 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_v3f32: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_mov_b32_e32 v6, v3 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v7, v4 +; GFX950-GISEL-NEXT: v_pk_mul_f32 v[0:1], 1.0, v[0:1] op_sel_hi:[0,1] +; GFX950-GISEL-NEXT: v_pk_mul_f32 v[2:3], 1.0, v[2:3] op_sel_hi:[0,1] +; GFX950-GISEL-NEXT: v_pk_mul_f32 v[6:7], 1.0, v[6:7] op_sel_hi:[0,1] +; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v5 +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v6, v6 +; GFX950-GISEL-NEXT: v_pk_mul_f32 v[4:5], 1.0, v[4:5] op_sel_hi:[0,1] +; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v7, v7 +; GFX950-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v4, v4 +; GFX950-GISEL-NEXT: v_min_f32_e32 v2, v2, v3 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_v3f32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX10-SDAG-NEXT: v_max_f32_e32 v5, v5, v5 +; GFX10-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX10-SDAG-NEXT: v_min_f32_e32 v1, v1, v4 +; GFX10-SDAG-NEXT: v_min_f32_e32 v2, v2, v5 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_v3f32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX10-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX10-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX10-GISEL-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX10-GISEL-NEXT: v_max_f32_e32 v5, v5, v5 +; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX10-GISEL-NEXT: v_min_f32_e32 v1, v1, v4 +; GFX10-GISEL-NEXT: v_min_f32_e32 v2, v2, v5 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_v3f32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_dual_max_f32 v3, v3, v3 :: v_dual_max_f32 v0, v0, v0 +; GFX11-SDAG-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v1, v1, v1 +; GFX11-SDAG-NEXT: v_dual_max_f32 v5, v5, v5 :: v_dual_max_f32 v2, v2, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_dual_min_f32 v0, v0, v3 :: v_dual_min_f32 v1, v1, v4 +; GFX11-SDAG-NEXT: v_min_f32_e32 v2, v2, v5 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_v3f32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 +; GFX11-GISEL-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 +; GFX11-GISEL-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_dual_min_f32 v0, v0, v3 :: v_dual_min_f32 v1, v1, v4 +; GFX11-GISEL-NEXT: v_min_f32_e32 v2, v2, v5 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_v3f32: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v3, v3, v3 :: v_dual_max_num_f32 v0, v0, v0 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v1, v1, v1 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v5, v5, v5 :: v_dual_max_num_f32 v2, v2, v2 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-NEXT: v_dual_min_num_f32 v0, v0, v3 :: v_dual_min_num_f32 v1, v1, v4 +; GFX12-SDAG-NEXT: v_min_num_f32_e32 v2, v2, v5 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_v3f32: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v3 :: v_dual_min_num_f32 v1, v1, v4 +; GFX12-GISEL-NEXT: v_min_num_f32_e32 v2, v2, v5 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <3 x float> @llvm.minimumnum.v3f32(<3 x float> %x, <3 x float> %y) ret <3 x float> %result } @@ -3934,101 +6834,218 @@ define <3 x float> @v_minimumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) { } define <4 x float> @v_minimumnum_v4f32(<4 x float> %x, <4 x float> %y) { -; GFX7-LABEL: v_minimumnum_v4f32: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4 -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v4 -; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v5 -; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 -; GFX7-NEXT: v_min_f32_e32 v1, v1, v4 -; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v6 -; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2 -; GFX7-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v7 -; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3 -; GFX7-NEXT: v_min_f32_e32 v3, v3, v4 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_v4f32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v4 -; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX8-NEXT: v_min_f32_e32 v0, v0, v4 -; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v5 -; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 -; GFX8-NEXT: v_min_f32_e32 v1, v1, v4 -; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v6 -; GFX8-NEXT: v_mul_f32_e32 v2, 1.0, v2 -; GFX8-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX8-NEXT: v_mul_f32_e32 v4, 1.0, v7 -; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v3 -; GFX8-NEXT: v_min_f32_e32 v3, v3, v4 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_minimumnum_v4f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v4, v4, v4 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-NEXT: v_min_f32_e32 v0, v0, v4 -; GFX9-NEXT: v_max_f32_e32 v4, v5, v5 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-NEXT: v_min_f32_e32 v1, v1, v4 -; GFX9-NEXT: v_max_f32_e32 v4, v6, v6 -; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX9-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX9-NEXT: v_max_f32_e32 v4, v7, v7 -; GFX9-NEXT: v_max_f32_e32 v3, v3, v3 -; GFX9-NEXT: v_min_f32_e32 v3, v3, v4 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_v4f32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e32 v4, v4, v4 -; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX10-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX10-NEXT: v_max_f32_e32 v6, v6, v6 -; GFX10-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX10-NEXT: v_max_f32_e32 v7, v7, v7 -; GFX10-NEXT: v_max_f32_e32 v3, v3, v3 -; GFX10-NEXT: v_min_f32_e32 v0, v0, v4 -; GFX10-NEXT: v_min_f32_e32 v1, v1, v5 -; GFX10-NEXT: v_min_f32_e32 v2, v2, v6 -; GFX10-NEXT: v_min_f32_e32 v3, v3, v7 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_v4f32: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5 -; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 -; GFX11-NEXT: v_dual_max_f32 v6, v6, v6 :: v_dual_max_f32 v7, v7, v7 -; GFX11-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_dual_min_f32 v0, v0, v4 :: v_dual_min_f32 v1, v1, v5 -; GFX11-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_v4f32: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5 -; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 -; GFX12-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7 -; GFX12-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v4 :: v_dual_min_num_f32 v1, v1, v5 -; GFX12-NEXT: v_dual_min_num_f32 v2, v2, v6 :: v_dual_min_num_f32 v3, v3, v7 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_v4f32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v5 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v4 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v6 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v7 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v4 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_v4f32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v4 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v5, 1.0, v5 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v6, 1.0, v6 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v7, 1.0, v7 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v5 +; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v6 +; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v7 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_v4f32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v5 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX8-SDAG-NEXT: v_min_f32_e32 v1, v1, v4 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v6 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX8-SDAG-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v7 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; GFX8-SDAG-NEXT: v_min_f32_e32 v3, v3, v4 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_v4f32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v4 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v5, 1.0, v5 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v6, 1.0, v6 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v7, 1.0, v7 +; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX8-GISEL-NEXT: v_min_f32_e32 v1, v1, v5 +; GFX8-GISEL-NEXT: v_min_f32_e32 v2, v2, v6 +; GFX8-GISEL-NEXT: v_min_f32_e32 v3, v3, v7 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_minimumnum_v4f32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v5, v5 +; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX9-SDAG-NEXT: v_min_f32_e32 v1, v1, v4 +; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v6, v6 +; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX9-SDAG-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v7, v7 +; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX9-SDAG-NEXT: v_min_f32_e32 v3, v3, v4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_v4f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX900-GISEL-NEXT: v_max_f32_e32 v5, v5, v5 +; GFX900-GISEL-NEXT: v_max_f32_e32 v6, v6, v6 +; GFX900-GISEL-NEXT: v_max_f32_e32 v7, v7, v7 +; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX900-GISEL-NEXT: v_min_f32_e32 v1, v1, v5 +; GFX900-GISEL-NEXT: v_min_f32_e32 v2, v2, v6 +; GFX900-GISEL-NEXT: v_min_f32_e32 v3, v3, v7 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_v4f32: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_pk_mul_f32 v[0:1], 1.0, v[0:1] op_sel_hi:[0,1] +; GFX950-GISEL-NEXT: v_pk_mul_f32 v[4:5], 1.0, v[4:5] op_sel_hi:[0,1] +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX950-GISEL-NEXT: v_pk_mul_f32 v[2:3], 1.0, v[2:3] op_sel_hi:[0,1] +; GFX950-GISEL-NEXT: v_pk_mul_f32 v[6:7], 1.0, v[6:7] op_sel_hi:[0,1] +; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v5, v5 +; GFX950-GISEL-NEXT: v_min_f32_e32 v1, v1, v4 +; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v6, v6 +; GFX950-GISEL-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v7, v7 +; GFX950-GISEL-NEXT: v_min_f32_e32 v3, v3, v4 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_v4f32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_max_f32_e32 v5, v5, v5 +; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX10-SDAG-NEXT: v_max_f32_e32 v6, v6, v6 +; GFX10-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX10-SDAG-NEXT: v_max_f32_e32 v7, v7, v7 +; GFX10-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX10-SDAG-NEXT: v_min_f32_e32 v1, v1, v5 +; GFX10-SDAG-NEXT: v_min_f32_e32 v2, v2, v6 +; GFX10-SDAG-NEXT: v_min_f32_e32 v3, v3, v7 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_v4f32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX10-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX10-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX10-GISEL-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX10-GISEL-NEXT: v_max_f32_e32 v5, v5, v5 +; GFX10-GISEL-NEXT: v_max_f32_e32 v6, v6, v6 +; GFX10-GISEL-NEXT: v_max_f32_e32 v7, v7, v7 +; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX10-GISEL-NEXT: v_min_f32_e32 v1, v1, v5 +; GFX10-GISEL-NEXT: v_min_f32_e32 v2, v2, v6 +; GFX10-GISEL-NEXT: v_min_f32_e32 v3, v3, v7 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_v4f32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5 +; GFX11-SDAG-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 +; GFX11-SDAG-NEXT: v_dual_max_f32 v6, v6, v6 :: v_dual_max_f32 v7, v7, v7 +; GFX11-SDAG-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_dual_min_f32 v0, v0, v4 :: v_dual_min_f32 v1, v1, v5 +; GFX11-SDAG-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_v4f32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 +; GFX11-GISEL-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 +; GFX11-GISEL-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5 +; GFX11-GISEL-NEXT: v_dual_max_f32 v6, v6, v6 :: v_dual_max_f32 v7, v7, v7 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_dual_min_f32 v0, v0, v4 :: v_dual_min_f32 v1, v1, v5 +; GFX11-GISEL-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_v4f32: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-NEXT: v_dual_min_num_f32 v0, v0, v4 :: v_dual_min_num_f32 v1, v1, v5 +; GFX12-SDAG-NEXT: v_dual_min_num_f32 v2, v2, v6 :: v_dual_min_num_f32 v3, v3, v7 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_v4f32: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v4 :: v_dual_min_num_f32 v1, v1, v5 +; GFX12-GISEL-NEXT: v_dual_min_num_f32 v2, v2, v6 :: v_dual_min_num_f32 v3, v3, v7 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %result } @@ -4092,88 +7109,171 @@ define <4 x float> @v_minimumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) { } define <2 x double> @v_minimumnum_v2f64(<2 x double> %x, <2 x double> %y) { -; GFX7-LABEL: v_minimumnum_v2f64: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] -; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_v2f64: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] -; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_minimumnum_v2f64: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] -; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_minimumnum_v2f64: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] -; GFX950-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7] -; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5] -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_v2f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] -; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_v2f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] -; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_v2f64: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] -; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] -; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] -; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[4:5] -; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[6:7] -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_v2f64: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] +; GFX7-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_v2f64: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX7-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] +; GFX7-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_v2f64: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] +; GFX8-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_v2f64: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX8-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] +; GFX8-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_minimumnum_v2f64: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] +; GFX900-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_v2f64: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX900-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] +; GFX900-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_v2f64: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] +; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5] +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_v2f64: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX950-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] +; GFX950-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_v2f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX10-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX10-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] +; GFX10-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_v2f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX10-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX10-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX10-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] +; GFX10-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_v2f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX11-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] +; GFX11-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_v2f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX11-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX11-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] +; GFX11-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_v2f64: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[4:5] +; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[6:7] +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_v2f64: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[4:5] +; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[6:7] +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> %x, <2 x double> %y) ret <2 x double> %result } @@ -4229,109 +7329,213 @@ define <2 x double> @v_minimumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) { } define <3 x double> @v_minimumnum_v3f64(<3 x double> %x, <3 x double> %y) { -; GFX7-LABEL: v_minimumnum_v3f64: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX7-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] -; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX7-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] -; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] -; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] -; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_v3f64: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX8-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] -; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX8-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] -; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] -; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] -; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_minimumnum_v3f64: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX900-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] -; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX900-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] -; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] -; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] -; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_minimumnum_v3f64: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] -; GFX950-NEXT: v_max_f64 v[6:7], v[8:9], v[8:9] -; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] -; GFX950-NEXT: v_max_f64 v[6:7], v[10:11], v[10:11] -; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX950-NEXT: v_min_f64 v[4:5], v[4:5], v[6:7] -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_v3f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX10-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] -; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX10-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] -; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] -; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] -; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_v3f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX11-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] -; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX11-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] -; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] -; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_v3f64: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] -; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] -; GFX12-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9] -; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] -; GFX12-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11] -; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] -; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[6:7] -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[8:9] -; GFX12-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[10:11] -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_v3f64: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX7-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX7-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX7-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX7-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_v3f64: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX7-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX7-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX7-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX7-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX7-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_v3f64: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX8-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX8-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX8-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX8-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_v3f64: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX8-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX8-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX8-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX8-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX8-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_minimumnum_v3f64: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX900-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX900-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX900-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX900-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_v3f64: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX900-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX900-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX900-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX900-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX900-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_v3f64: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[8:9], v[8:9] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] +; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[10:11], v[10:11] +; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[6:7] +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_v3f64: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX950-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX950-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX950-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX950-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX950-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_v3f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX10-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX10-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX10-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX10-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX10-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX10-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_v3f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX10-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX10-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX10-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX10-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX10-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX10-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX10-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_v3f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX11-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX11-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX11-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX11-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX11-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_v3f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX11-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX11-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX11-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX11-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX11-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_v3f64: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] +; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[6:7] +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[8:9] +; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[10:11] +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_v3f64: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11] +; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[6:7] +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[8:9] +; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[10:11] +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <3 x double> @llvm.minimumnum.v3f64(<3 x double> %x, <3 x double> %y) ret <3 x double> %result } @@ -4344,179 +7548,304 @@ define <3 x double> @v_minimumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) { ; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] ; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] ; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_v3f64_nnan: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] -; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] -; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_minimumnum_v3f64_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] -; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] -; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_v3f64_nnan: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] -; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] -; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_v3f64_nnan: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] -; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] -; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_v3f64_nnan: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[6:7] -; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[8:9] -; GFX12-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[10:11] -; GFX12-NEXT: s_setpc_b64 s[30:31] - %result = call nnan <3 x double> @llvm.minimumnum.v3f64(<3 x double> %x, <3 x double> %y) - ret <3 x double> %result -} - -define <4 x double> @v_minimumnum_v4f64(<4 x double> %x, <4 x double> %y) { -; GFX7-LABEL: v_minimumnum_v4f64: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] -; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX7-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] -; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX7-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] -; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX7-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] -; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] -; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] -; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] -; GFX7-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_v4f64: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX8-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] -; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX8-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] -; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX8-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] -; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] -; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] -; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] -; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] +; +; GFX8-LABEL: v_minimumnum_v3f64_nnan: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_minimumnum_v4f64: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] -; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX900-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] -; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX900-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] -; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX900-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] -; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] -; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] -; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] -; GFX900-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_minimumnum_v4f64: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] -; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] -; GFX950-NEXT: v_max_f64 v[8:9], v[10:11], v[10:11] -; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] -; GFX950-NEXT: v_max_f64 v[8:9], v[12:13], v[12:13] -; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX950-NEXT: v_min_f64 v[4:5], v[4:5], v[8:9] -; GFX950-NEXT: v_max_f64 v[8:9], v[14:15], v[14:15] -; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX950-NEXT: v_min_f64 v[6:7], v[6:7], v[8:9] -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_v4f64: +; GFX9-LABEL: v_minimumnum_v3f64_nnan: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_minimumnum_v3f64_nnan: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] -; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX10-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] -; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX10-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] -; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX10-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] -; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] -; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] -; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] -; GFX10-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] +; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_minimumnum_v4f64: +; GFX11-LABEL: v_minimumnum_v3f64_nnan: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] -; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX11-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] -; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX11-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] -; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX11-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] -; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] -; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] -; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] +; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; -; GFX12-LABEL: v_minimumnum_v4f64: +; GFX12-LABEL: v_minimumnum_v3f64_nnan: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9] -; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] -; GFX12-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11] -; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] -; GFX12-NEXT: v_max_num_f64_e32 v[12:13], v[12:13], v[12:13] -; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] -; GFX12-NEXT: v_max_num_f64_e32 v[14:15], v[14:15], v[14:15] -; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] -; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[8:9] -; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[10:11] -; GFX12-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[12:13] -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX12-NEXT: v_min_num_f64_e32 v[6:7], v[6:7], v[14:15] +; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[6:7] +; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[8:9] +; GFX12-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[10:11] ; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = call nnan <3 x double> @llvm.minimumnum.v3f64(<3 x double> %x, <3 x double> %y) + ret <3 x double> %result +} + +define <4 x double> @v_minimumnum_v4f64(<4 x double> %x, <4 x double> %y) { +; GFX7-SDAG-LABEL: v_minimumnum_v4f64: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX7-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX7-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX7-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX7-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] +; GFX7-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] +; GFX7-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] +; GFX7-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_v4f64: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX7-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX7-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX7-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX7-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX7-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] +; GFX7-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] +; GFX7-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] +; GFX7-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_v4f64: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX8-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX8-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX8-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX8-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] +; GFX8-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] +; GFX8-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] +; GFX8-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_v4f64: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX8-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX8-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX8-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX8-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX8-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] +; GFX8-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] +; GFX8-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] +; GFX8-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_minimumnum_v4f64: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX900-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX900-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX900-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX900-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] +; GFX900-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] +; GFX900-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] +; GFX900-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_v4f64: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX900-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX900-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX900-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX900-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX900-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] +; GFX900-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] +; GFX900-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] +; GFX900-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_v4f64: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] +; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[10:11], v[10:11] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[12:13], v[12:13] +; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[8:9] +; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[14:15], v[14:15] +; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX950-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[8:9] +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_v4f64: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX950-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX950-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX950-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX950-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX950-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] +; GFX950-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] +; GFX950-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] +; GFX950-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_v4f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX10-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX10-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX10-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX10-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX10-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX10-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] +; GFX10-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] +; GFX10-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] +; GFX10-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_v4f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX10-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX10-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX10-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX10-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX10-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX10-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX10-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] +; GFX10-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] +; GFX10-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] +; GFX10-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_v4f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX11-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX11-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX11-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX11-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX11-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX11-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] +; GFX11-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] +; GFX11-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_v4f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX11-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX11-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX11-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX11-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX11-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX11-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] +; GFX11-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] +; GFX11-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_v4f64: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[12:13], v[12:13], v[12:13] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[14:15], v[14:15], v[14:15] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] +; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[8:9] +; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[10:11] +; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[12:13] +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[6:7], v[6:7], v[14:15] +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_v4f64: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[12:13], v[12:13], v[12:13] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[14:15], v[14:15], v[14:15] +; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[8:9] +; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[10:11] +; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[12:13] +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[6:7], v[6:7], v[14:15] +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <4 x double> @llvm.minimumnum.v4f64(<4 x double> %x, <4 x double> %y) ret <4 x double> %result } @@ -4584,97 +7913,183 @@ define <4 x double> @v_minimumnum_v4f64_nnan(<4 x double> %x, <4 x double> %y) { } define half @v_minimumnum_f16_no_ieee(half %x, half %y) #0 { -; GFX7-LABEL: v_minimumnum_f16_no_ieee: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f16_no_ieee: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_minimumnum_f16_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_f16_no_ieee: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX10-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-TRUE16-LABEL: v_minimumnum_f16_no_ieee: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l -; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: v_minimumnum_f16_no_ieee: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-TRUE16-LABEL: v_minimumnum_f16_no_ieee: -; GFX12-TRUE16: ; %bb.0: -; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h -; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-FAKE16-LABEL: v_minimumnum_f16_no_ieee: -; GFX12-FAKE16: ; %bb.0: -; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 -; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1 -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 -; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_f16_no_ieee: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f16_no_ieee: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f16_no_ieee: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f16_no_ieee: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_minimumnum_f16_no_ieee: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_minimumnum_f16_no_ieee: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX9-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_f16_no_ieee: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_f16_no_ieee: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16_no_ieee: +; GFX11-TRUE16-SDAG: ; %bb.0: +; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l +; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16_no_ieee: +; GFX11-TRUE16-GISEL: ; %bb.0: +; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l +; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16_no_ieee: +; GFX11-FAKE16-SDAG: ; %bb.0: +; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16_no_ieee: +; GFX11-FAKE16-GISEL: ; %bb.0: +; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_no_ieee: +; GFX12-TRUE16-SDAG: ; %bb.0: +; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l +; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16_no_ieee: +; GFX12-TRUE16-GISEL: ; %bb.0: +; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l +; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l +; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h +; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16_no_ieee: +; GFX12-FAKE16-SDAG: ; %bb.0: +; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16_no_ieee: +; GFX12-FAKE16-GISEL: ; %bb.0: +; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 +; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v1, v1, v1 +; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call half @llvm.minimumnum.f16(half %x, half %y) ret half %result } define half @v_minimumnum_f16_nan_no_ieee(half %x, half %y) #0 { -; GFX7-LABEL: v_minimumnum_f16_nan_no_ieee: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_f16_nan_no_ieee: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f16_nan_no_ieee: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_minimumnum_f16_nan_no_ieee: ; GFX8: ; %bb.0: @@ -4730,57 +8145,109 @@ define half @v_minimumnum_f16_nan_no_ieee(half %x, half %y) #0 { } define float @v_minimumnum_f32_no_ieee(float %x, float %y) #0 { -; GFX7-LABEL: v_minimumnum_f32_no_ieee: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f32_no_ieee: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 -; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_minimumnum_f32_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_f32_no_ieee: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_f32_no_ieee: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_f32_no_ieee: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_f32_no_ieee: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f32_no_ieee: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f32_no_ieee: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f32_no_ieee: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_minimumnum_f32_no_ieee: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_minimumnum_f32_no_ieee: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_f32_no_ieee: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_f32_no_ieee: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_f32_no_ieee: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_f32_no_ieee: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_f32_no_ieee: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_f32_no_ieee: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.minimumnum.f32(float %x, float %y) ret float %result } @@ -4830,59 +8297,113 @@ define float @v_minimumnum_f32_nnan_no_ieee(float %x, float %y) #0 { } define double @v_minimumnum_f64_no_ieee(double %x, double %y) #0 { -; GFX7-LABEL: v_minimumnum_f64_no_ieee: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_f64_no_ieee: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_minimumnum_f64_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_f64_no_ieee: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_f64_no_ieee: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_f64_no_ieee: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] -; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_f64_no_ieee: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_f64_no_ieee: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_f64_no_ieee: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_f64_no_ieee: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_minimumnum_f64_no_ieee: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX9-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_minimumnum_f64_no_ieee: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX9-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX9-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_f64_no_ieee: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX10-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_f64_no_ieee: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX10-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_f64_no_ieee: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_f64_no_ieee: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_f64_no_ieee: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] +; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_f64_no_ieee: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] +; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call double @llvm.minimumnum.f64(double %x, double %y) ret double %result } @@ -4932,106 +8453,199 @@ define double @v_minimumnum_f64_nnan_no_ieee(double %x, double %y) #0 { } define <2 x half> @v_minimumnum_v2f16_no_ieee(<2 x half> %x, <2 x half> %y) #0 { -; GFX7-LABEL: v_minimumnum_v2f16_no_ieee: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v2 -; GFX7-NEXT: v_min_f32_e32 v1, v1, v3 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_v2f16_no_ieee: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX8-NEXT: v_min_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-LABEL: v_minimumnum_v2f16_no_ieee: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX900-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX900-NEXT: v_pk_min_f16 v0, v0, v1 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX950-LABEL: v_minimumnum_v2f16_no_ieee: -; GFX950: ; %bb.0: -; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX950-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_pk_min_f16 v0, v0, v1 -; GFX950-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_minimumnum_v2f16_no_ieee: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX10-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX10-NEXT: v_pk_min_f16 v0, v0, v1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_minimumnum_v2f16_no_ieee: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_pk_min_f16 v0, v0, v1 -; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX12-LABEL: v_minimumnum_v2f16_no_ieee: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v1 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_v2f16_no_ieee: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_v2f16_no_ieee: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_v2f16_no_ieee: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_v2f16_no_ieee: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v0, v0 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_max_f16_e32 v3, v1, v1 +; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v2, v3 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_minimumnum_v2f16_no_ieee: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_v2f16_no_ieee: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-GISEL-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_v2f16_no_ieee: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-SDAG-NEXT: s_nop 0 +; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_v2f16_no_ieee: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-GISEL-NEXT: s_nop 0 +; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_minimumnum_v2f16_no_ieee: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_minimumnum_v2f16_no_ieee: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_minimumnum_v2f16_no_ieee: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_minimumnum_v2f16_no_ieee: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-SDAG-LABEL: v_minimumnum_v2f16_no_ieee: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v1 +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-GISEL-LABEL: v_minimumnum_v2f16_no_ieee: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v1 +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> %x, <2 x half> %y) ret <2 x half> %result } define <2 x half> @v_minimumnum_v2f16_nnan_no_ieee(<2 x half> %x, <2 x half> %y) #0 { -; GFX7-LABEL: v_minimumnum_v2f16_nnan_no_ieee: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v2 -; GFX7-NEXT: v_min_f32_e32 v1, v1, v3 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_v2f16_nnan_no_ieee: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_v2f16_nnan_no_ieee: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_v2f16_nnan_no_ieee: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_v2f16_nnan_no_ieee: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_v2f16_nnan_no_ieee: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v0, v1 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_minimumnum_v2f16_nnan_no_ieee: ; GFX9: ; %bb.0: @@ -5065,34 +8679,60 @@ define <2 x half> @v_minimumnum_v2f16_nnan_no_ieee(<2 x half> %x, <2 x half> %y) } define <3 x half> @v_minimumnum_v3f16_nnan_no_ieee(<3 x half> %x, <3 x half> %y) #0 { -; GFX7-LABEL: v_minimumnum_v3f16_nnan_no_ieee: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v3 -; GFX7-NEXT: v_min_f32_e32 v1, v1, v4 -; GFX7-NEXT: v_min_f32_e32 v2, v2, v5 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_v3f16_nnan_no_ieee: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_e32 v0, v0, v2 -; GFX8-NEXT: v_min_f16_e32 v1, v1, v3 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_v3f16_nnan_no_ieee: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v4 +; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v5 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_v3f16_nnan_no_ieee: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v5 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_v3f16_nnan_no_ieee: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v3 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_v3f16_nnan_no_ieee: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v0, v2 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v1, v1, v3 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_minimumnum_v3f16_nnan_no_ieee: ; GFX9: ; %bb.0: @@ -5130,41 +8770,73 @@ define <3 x half> @v_minimumnum_v3f16_nnan_no_ieee(<3 x half> %x, <3 x half> %y) } define <4 x half> @v_minimumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y) #0 { -; GFX7-LABEL: v_minimumnum_v4f16_nnan_no_ieee: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 -; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 -; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 -; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 -; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 -; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 -; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX7-NEXT: v_min_f32_e32 v0, v0, v4 -; GFX7-NEXT: v_min_f32_e32 v1, v1, v5 -; GFX7-NEXT: v_min_f32_e32 v2, v2, v6 -; GFX7-NEXT: v_min_f32_e32 v3, v3, v7 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_minimumnum_v4f16_nnan_no_ieee: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_min_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX8-NEXT: v_min_f16_e32 v1, v1, v3 -; GFX8-NEXT: v_min_f16_e32 v0, v0, v2 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: v_minimumnum_v4f16_nnan_no_ieee: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v5 +; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v6 +; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v7 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: v_minimumnum_v4f16_nnan_no_ieee: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v6 +; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v5 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v7 +; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v5 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_minimumnum_v4f16_nnan_no_ieee: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_min_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v3 +; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v2 +; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v5 +; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: v_minimumnum_v4f16_nnan_no_ieee: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v0, v2 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v1, v3 +; GFX8-GISEL-NEXT: v_min_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_minimumnum_v4f16_nnan_no_ieee: ; GFX9: ; %bb.0: @@ -5202,3 +8874,6 @@ define <4 x half> @v_minimumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y) } attributes #0 = { "amdgpu-ieee"="false" } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX900: {{.*}} +; GFX950: {{.*}}