From 815b7c366a93dab2698cd42688551d2ad1188525 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Tue, 22 Jul 2025 13:33:14 +0100 Subject: [PATCH 1/3] Add unittest to improve test coverage for SelectionDAG::getNode(). It is easier to validate the undef/poison behaviour by probing getNode directly rather than relying on llc tests. --- llvm/unittests/CodeGen/CMakeLists.txt | 1 + .../SelectionDAGAddressAnalysisTest.cpp | 95 +----- .../SelectionDAGNodeConstructionTest.cpp | 301 ++++++++++++++++++ .../CodeGen/SelectionDAGPatternMatchTest.cpp | 94 +----- llvm/unittests/CodeGen/SelectionDAGTestBase.h | 99 ++++++ 5 files changed, 405 insertions(+), 185 deletions(-) create mode 100644 llvm/unittests/CodeGen/SelectionDAGNodeConstructionTest.cpp create mode 100644 llvm/unittests/CodeGen/SelectionDAGTestBase.h diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt index d19b122676c9b..22dbdaa4fa82e 100644 --- a/llvm/unittests/CodeGen/CMakeLists.txt +++ b/llvm/unittests/CodeGen/CMakeLists.txt @@ -42,6 +42,7 @@ add_llvm_unittest(CodeGenTests ScalableVectorMVTsTest.cpp SchedBoundary.cpp SelectionDAGAddressAnalysisTest.cpp + SelectionDAGNodeConstructionTest.cpp SelectionDAGPatternMatchTest.cpp TypeTraitsTest.cpp TargetOptionsTest.cpp diff --git a/llvm/unittests/CodeGen/SelectionDAGAddressAnalysisTest.cpp b/llvm/unittests/CodeGen/SelectionDAGAddressAnalysisTest.cpp index 0058daf680816..7ad7a51091ceb 100644 --- a/llvm/unittests/CodeGen/SelectionDAGAddressAnalysisTest.cpp +++ b/llvm/unittests/CodeGen/SelectionDAGAddressAnalysisTest.cpp @@ -7,103 +7,12 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" +#include "SelectionDAGTestBase.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/AsmParser/Parser.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/TargetLowering.h" -#include "llvm/IR/Module.h" -#include "llvm/MC/TargetRegistry.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Target/TargetMachine.h" -#include "gtest/gtest.h" namespace llvm { -class SelectionDAGAddressAnalysisTest : public testing::Test { -protected: - static void SetUpTestCase() { - InitializeAllTargets(); - InitializeAllTargetMCs(); - } - - void SetUp() override { - StringRef Assembly = "@g = global i32 0\n" - "@g_alias = alias i32, i32* @g\n" - "define i32 @f() {\n" - " %1 = load i32, i32* @g\n" - " ret i32 %1\n" - "}"; - - Triple TargetTriple("aarch64--"); - std::string Error; - const Target *T = TargetRegistry::lookupTarget("", TargetTriple, Error); - // FIXME: These tests do not depend on AArch64 specifically, but we have to - // initialize a target. A skeleton Target for unittests would allow us to - // always run these tests. - if (!T) - GTEST_SKIP(); - - TargetOptions Options; - TM = std::unique_ptr( - T->createTargetMachine(TargetTriple, "", "+sve", Options, std::nullopt, - std::nullopt, CodeGenOptLevel::Aggressive)); - if (!TM) - GTEST_SKIP(); - - SMDiagnostic SMError; - M = parseAssemblyString(Assembly, SMError, Context); - if (!M) - report_fatal_error(SMError.getMessage()); - M->setDataLayout(TM->createDataLayout()); - - F = M->getFunction("f"); - if (!F) - report_fatal_error("F?"); - G = M->getGlobalVariable("g"); - if (!G) - report_fatal_error("G?"); - AliasedG = M->getNamedAlias("g_alias"); - if (!AliasedG) - report_fatal_error("AliasedG?"); - - MachineModuleInfo MMI(TM.get()); - - MF = std::make_unique(*F, *TM, *TM->getSubtargetImpl(*F), - MMI.getContext(), 0); - - DAG = std::make_unique(*TM, CodeGenOptLevel::None); - if (!DAG) - report_fatal_error("DAG?"); - OptimizationRemarkEmitter ORE(F); - FunctionAnalysisManager FAM; - FAM.registerPass([&] { return TM->getTargetIRAnalysis(); }); - - TargetTransformInfo TTI = TM->getTargetIRAnalysis().run(*F, FAM); - DAG->init(*MF, ORE, nullptr, nullptr, nullptr, nullptr, nullptr, MMI, - nullptr, TTI.hasBranchDivergence(F)); - } - - TargetLoweringBase::LegalizeTypeAction getTypeAction(EVT VT) { - return DAG->getTargetLoweringInfo().getTypeAction(Context, VT); - } - - EVT getTypeToTransformTo(EVT VT) { - return DAG->getTargetLoweringInfo().getTypeToTransformTo(Context, VT); - } - - LLVMContext Context; - std::unique_ptr TM; - std::unique_ptr M; - Function *F; - GlobalVariable *G; - GlobalAlias *AliasedG; - std::unique_ptr MF; - std::unique_ptr DAG; -}; +class SelectionDAGAddressAnalysisTest : public SelectionDAGTestBase {}; TEST_F(SelectionDAGAddressAnalysisTest, sameFrameObject) { SDLoc Loc; diff --git a/llvm/unittests/CodeGen/SelectionDAGNodeConstructionTest.cpp b/llvm/unittests/CodeGen/SelectionDAGNodeConstructionTest.cpp new file mode 100644 index 0000000000000..1016790637b82 --- /dev/null +++ b/llvm/unittests/CodeGen/SelectionDAGNodeConstructionTest.cpp @@ -0,0 +1,301 @@ +//===---- llvm/unittest/CodeGen/SelectionDAGPatternMatchTest.cpp ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SelectionDAGTestBase.h" + +using namespace llvm; + +class SelectionDAGNodeConstructionTest : public SelectionDAGTestBase {}; + +TEST_F(SelectionDAGNodeConstructionTest, ADD) { + SDLoc DL; + EVT Int32VT = EVT::getIntegerVT(Context, 32); + SDValue Poison = DAG->getPOISON(Int32VT); + SDValue Undef = DAG->getUNDEF(Int32VT); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); + + EXPECT_EQ(DAG->getNode(ISD::ADD, DL, Int32VT, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::ADD, DL, Int32VT, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::ADD, DL, Int32VT, Poison, Undef), Poison); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::ADD, DL, Int32VT, Undef, Poison), Undef); + + EXPECT_EQ(DAG->getNode(ISD::ADD, DL, Int32VT, Op, Undef), Undef); + EXPECT_EQ(DAG->getNode(ISD::ADD, DL, Int32VT, Undef, Op), Undef); + EXPECT_EQ(DAG->getNode(ISD::ADD, DL, Int32VT, Undef, Undef), Undef); +} + +TEST_F(SelectionDAGNodeConstructionTest, AND) { + SDLoc DL; + EVT Int32VT = EVT::getIntegerVT(Context, 32); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); + SDValue Poison = DAG->getPOISON(Int32VT); + SDValue Undef = DAG->getUNDEF(Int32VT); + SDValue Zero = DAG->getConstant(0, DL, Int32VT); + + EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Poison, Undef), Poison); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Undef, Poison), Zero); + + EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Op, Undef), Zero); + EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Undef, Op), Zero); + // TODO: Should be undef. + EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Undef, Undef), Zero); +} + +TEST_F(SelectionDAGNodeConstructionTest, MUL) { + SDLoc DL; + EVT Int32VT = EVT::getIntegerVT(Context, 32); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); + SDValue Poison = DAG->getPOISON(Int32VT); + SDValue Undef = DAG->getUNDEF(Int32VT); + SDValue Zero = DAG->getConstant(0, DL, Int32VT); + + EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Poison, Undef), Poison); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Undef, Poison), Zero); + + EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Op, Undef), Zero); + EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Undef, Op), Zero); + // TODO: Should be undef. + EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Undef, Undef), Zero); +} + +TEST_F(SelectionDAGNodeConstructionTest, OR) { + SDLoc DL; + EVT Int32VT = EVT::getIntegerVT(Context, 32); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); + SDValue Poison = DAG->getPOISON(Int32VT); + SDValue Undef = DAG->getUNDEF(Int32VT); + SDValue AllOnes = DAG->getAllOnesConstant(DL, Int32VT); + + EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Poison, Undef), Poison); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Undef, Poison), AllOnes); + + EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Op, Undef), AllOnes); + EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Undef, Op), AllOnes); + // TODO: Should be undef. + EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Undef, Undef), AllOnes); +} + +TEST_F(SelectionDAGNodeConstructionTest, SADDSAT) { + SDLoc DL; + EVT Int32VT = EVT::getIntegerVT(Context, 32); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); + SDValue Poison = DAG->getPOISON(Int32VT); + SDValue Undef = DAG->getUNDEF(Int32VT); + SDValue AllOnes = DAG->getAllOnesConstant(DL, Int32VT); + + EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Poison, Undef), Poison); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Undef, Poison), AllOnes); + + EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Op, Undef), AllOnes); + EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Undef, Op), AllOnes); + // TODO: Should be undef. + EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Undef, Undef), AllOnes); +} + +TEST_F(SelectionDAGNodeConstructionTest, SDIV) { + SDLoc DL; + EVT Int32VT = EVT::getIntegerVT(Context, 32); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); + SDValue Poison = DAG->getPOISON(Int32VT); + SDValue Undef = DAG->getUNDEF(Int32VT); + SDValue Zero = DAG->getConstant(0, DL, Int32VT); + + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Op, Poison), Undef); + EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Poison, Op), Poison); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Poison, Undef), Undef); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Undef, Poison), Undef); + + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Op, Undef), Undef); + EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Undef, Op), Zero); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Undef, Undef), Undef); +} + +TEST_F(SelectionDAGNodeConstructionTest, SREM) { + SDLoc DL; + EVT Int32VT = EVT::getIntegerVT(Context, 32); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); + SDValue Poison = DAG->getPOISON(Int32VT); + SDValue Undef = DAG->getUNDEF(Int32VT); + SDValue Zero = DAG->getConstant(0, DL, Int32VT); + + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Op, Poison), Undef); + EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Poison, Op), Poison); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Poison, Undef), Undef); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Undef, Poison), Undef); + + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Op, Undef), Undef); + EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Undef, Op), Zero); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Undef, Undef), Undef); +} + +TEST_F(SelectionDAGNodeConstructionTest, SSUBSAT) { + SDLoc DL; + EVT Int32VT = EVT::getIntegerVT(Context, 32); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); + SDValue Poison = DAG->getPOISON(Int32VT); + SDValue Undef = DAG->getUNDEF(Int32VT); + SDValue Zero = DAG->getConstant(0, DL, Int32VT); + + EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Poison, Undef), Poison); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Undef, Poison), Zero); + + EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Op, Undef), Zero); + EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Undef, Op), Zero); + // TODO: Should be undef. + EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Undef, Undef), Zero); +} + +TEST_F(SelectionDAGNodeConstructionTest, SUB) { + SDLoc DL; + EVT Int32VT = EVT::getIntegerVT(Context, 32); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); + SDValue Poison = DAG->getPOISON(Int32VT); + SDValue Undef = DAG->getUNDEF(Int32VT); + + EXPECT_EQ(DAG->getNode(ISD::SUB, DL, Int32VT, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::SUB, DL, Int32VT, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::SUB, DL, Int32VT, Poison, Undef), Poison); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::SUB, DL, Int32VT, Undef, Poison), Undef); + + EXPECT_EQ(DAG->getNode(ISD::SUB, DL, Int32VT, Op, Undef), Undef); + EXPECT_EQ(DAG->getNode(ISD::SUB, DL, Int32VT, Undef, Op), Undef); + EXPECT_EQ(DAG->getNode(ISD::SUB, DL, Int32VT, Undef, Undef), Undef); +} + +TEST_F(SelectionDAGNodeConstructionTest, UADDSAT) { + SDLoc DL; + EVT Int32VT = EVT::getIntegerVT(Context, 32); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); + SDValue Poison = DAG->getPOISON(Int32VT); + SDValue Undef = DAG->getUNDEF(Int32VT); + SDValue AllOnes = DAG->getAllOnesConstant(DL, Int32VT); + + EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Poison, Undef), Poison); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Undef, Poison), AllOnes); + + EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Op, Undef), AllOnes); + EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Undef, Op), AllOnes); + // TODO: Should be undef. + EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Undef, Undef), AllOnes); +} + +TEST_F(SelectionDAGNodeConstructionTest, UDIV) { + SDLoc DL; + EVT Int32VT = EVT::getIntegerVT(Context, 32); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); + SDValue Poison = DAG->getPOISON(Int32VT); + SDValue Undef = DAG->getUNDEF(Int32VT); + SDValue Zero = DAG->getConstant(0, DL, Int32VT); + + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Op, Poison), Undef); + EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Poison, Op), Poison); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Poison, Undef), Undef); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Undef, Poison), Undef); + + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Op, Undef), Undef); + EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Undef, Op), Zero); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Undef, Undef), Undef); +} + +TEST_F(SelectionDAGNodeConstructionTest, UREM) { + SDLoc DL; + EVT Int32VT = EVT::getIntegerVT(Context, 32); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); + SDValue Poison = DAG->getPOISON(Int32VT); + SDValue Undef = DAG->getUNDEF(Int32VT); + SDValue Zero = DAG->getConstant(0, DL, Int32VT); + + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Op, Poison), Undef); + EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Poison, Op), Poison); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Poison, Undef), Undef); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Undef, Poison), Undef); + + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Op, Undef), Undef); + EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Undef, Op), Zero); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Undef, Undef), Undef); +} + +TEST_F(SelectionDAGNodeConstructionTest, USUBSAT) { + SDLoc DL; + EVT Int32VT = EVT::getIntegerVT(Context, 32); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); + SDValue Poison = DAG->getPOISON(Int32VT); + SDValue Undef = DAG->getUNDEF(Int32VT); + SDValue Zero = DAG->getConstant(0, DL, Int32VT); + + EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Poison, Undef), Poison); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Undef, Poison), Zero); + + EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Op, Undef), Zero); + EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Undef, Op), Zero); + // TODO: Should be undef. + EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Undef, Undef), Zero); +} + +TEST_F(SelectionDAGNodeConstructionTest, XOR) { + SDLoc DL; + EVT Int32VT = EVT::getIntegerVT(Context, 32); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); + SDValue Poison = DAG->getPOISON(Int32VT); + SDValue Undef = DAG->getUNDEF(Int32VT); + SDValue Zero = DAG->getConstant(0, DL, Int32VT); + + EXPECT_EQ(DAG->getNode(ISD::XOR, DL, Int32VT, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::XOR, DL, Int32VT, Poison, Op), Poison); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::XOR, DL, Int32VT, Poison, Undef), Zero); + // TODO: Should be poison. + EXPECT_EQ(DAG->getNode(ISD::XOR, DL, Int32VT, Undef, Poison), Zero); + + EXPECT_EQ(DAG->getNode(ISD::XOR, DL, Int32VT, Op, Undef), Undef); + EXPECT_EQ(DAG->getNode(ISD::XOR, DL, Int32VT, Undef, Op), Undef); + EXPECT_EQ(DAG->getNode(ISD::XOR, DL, Int32VT, Undef, Undef), Zero); +} diff --git a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp index 30a1406da7734..4e0bf385d72b2 100644 --- a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp +++ b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp @@ -6,102 +6,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/AsmParser/Parser.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "SelectionDAGTestBase.h" #include "llvm/CodeGen/SDPatternMatch.h" -#include "llvm/CodeGen/TargetLowering.h" -#include "llvm/IR/Module.h" -#include "llvm/MC/TargetRegistry.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Target/TargetMachine.h" -#include "gtest/gtest.h" using namespace llvm; -class SelectionDAGPatternMatchTest : public testing::Test { -protected: - static void SetUpTestCase() { - InitializeAllTargets(); - InitializeAllTargetMCs(); - } - - void SetUp() override { - StringRef Assembly = "@g = global i32 0\n" - "@g_alias = alias i32, i32* @g\n" - "define i32 @f() {\n" - " %1 = load i32, i32* @g\n" - " ret i32 %1\n" - "}"; - - Triple TargetTriple("riscv64--"); - std::string Error; - const Target *T = TargetRegistry::lookupTarget("", TargetTriple, Error); - // FIXME: These tests do not depend on RISCV specifically, but we have to - // initialize a target. A skeleton Target for unittests would allow us to - // always run these tests. - if (!T) - GTEST_SKIP(); - - TargetOptions Options; - TM = std::unique_ptr(T->createTargetMachine( - TargetTriple, "", "+m,+f,+d,+v", Options, std::nullopt, std::nullopt, - CodeGenOptLevel::Aggressive)); - if (!TM) - GTEST_SKIP(); - - SMDiagnostic SMError; - M = parseAssemblyString(Assembly, SMError, Context); - if (!M) - report_fatal_error(SMError.getMessage()); - M->setDataLayout(TM->createDataLayout()); - - F = M->getFunction("f"); - if (!F) - report_fatal_error("F?"); - G = M->getGlobalVariable("g"); - if (!G) - report_fatal_error("G?"); - AliasedG = M->getNamedAlias("g_alias"); - if (!AliasedG) - report_fatal_error("AliasedG?"); - - MachineModuleInfo MMI(TM.get()); - - MF = std::make_unique(*F, *TM, *TM->getSubtargetImpl(*F), - MMI.getContext(), 0); - - DAG = std::make_unique(*TM, CodeGenOptLevel::None); - if (!DAG) - report_fatal_error("DAG?"); - OptimizationRemarkEmitter ORE(F); - FunctionAnalysisManager FAM; - FAM.registerPass([&] { return TM->getTargetIRAnalysis(); }); - - TargetTransformInfo TTI = TM->getTargetIRAnalysis().run(*F, FAM); - DAG->init(*MF, ORE, nullptr, nullptr, nullptr, nullptr, nullptr, MMI, - nullptr, TTI.hasBranchDivergence(F)); - } - - TargetLoweringBase::LegalizeTypeAction getTypeAction(EVT VT) { - return DAG->getTargetLoweringInfo().getTypeAction(Context, VT); - } - - EVT getTypeToTransformTo(EVT VT) { - return DAG->getTargetLoweringInfo().getTypeToTransformTo(Context, VT); - } - - LLVMContext Context; - std::unique_ptr TM; - std::unique_ptr M; - Function *F; - GlobalVariable *G; - GlobalAlias *AliasedG; - std::unique_ptr MF; - std::unique_ptr DAG; -}; +class SelectionDAGPatternMatchTest : public SelectionDAGTestBase {}; TEST_F(SelectionDAGPatternMatchTest, matchValueType) { SDLoc DL; diff --git a/llvm/unittests/CodeGen/SelectionDAGTestBase.h b/llvm/unittests/CodeGen/SelectionDAGTestBase.h new file mode 100644 index 0000000000000..8daf8f398449c --- /dev/null +++ b/llvm/unittests/CodeGen/SelectionDAGTestBase.h @@ -0,0 +1,99 @@ +//===---- llvm/unittest/CodeGen/SelectionDAGPatternMatchTest.cpp ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Target/TargetMachine.h" +#include "gtest/gtest.h" + +using namespace llvm; + +class SelectionDAGTestBase : public testing::Test { +protected: + static void SetUpTestCase() { + InitializeAllTargets(); + InitializeAllTargetMCs(); + } + + void SetUp() override { + StringRef Assembly = "@g = global i32 0\n" + "@g_alias = alias i32, i32* @g\n" + "define i32 @f() {\n" + " %1 = load i32, i32* @g\n" + " ret i32 %1\n" + "}"; + + Triple TargetTriple("aarch64--"); + std::string Error; + const Target *T = TargetRegistry::lookupTarget("", TargetTriple, Error); + // FIXME: These tests do not depend on AArch64 specifically, but we have to + // initialize a target. A skeleton Target for unittests would allow us to + // always run these tests. + if (!T) + GTEST_SKIP(); + + TargetOptions Options; + TM = std::unique_ptr( + T->createTargetMachine(TargetTriple, "", "+sve", Options, std::nullopt, + std::nullopt, CodeGenOptLevel::Aggressive)); + if (!TM) + GTEST_SKIP(); + + SMDiagnostic SMError; + M = parseAssemblyString(Assembly, SMError, Context); + ASSERT_TRUE(M && "Could not parse module!"); + M->setDataLayout(TM->createDataLayout()); + + F = M->getFunction("f"); + ASSERT_TRUE(F && "Could not get function f!"); + G = M->getGlobalVariable("g"); + ASSERT_TRUE(G && "Could not get global g!"); + AliasedG = M->getNamedAlias("g_alias"); + ASSERT_TRUE(AliasedG && "Could not get alias g_alias!"); + + MachineModuleInfo MMI(TM.get()); + + MF = std::make_unique(*F, *TM, *TM->getSubtargetImpl(*F), + MMI.getContext(), 0); + + DAG = std::make_unique(*TM, CodeGenOptLevel::None); + if (!DAG) + reportFatalUsageError("Failed to create SelectionDAG?"); + OptimizationRemarkEmitter ORE(F); + FunctionAnalysisManager FAM; + FAM.registerPass([&] { return TM->getTargetIRAnalysis(); }); + + TargetTransformInfo TTI = TM->getTargetIRAnalysis().run(*F, FAM); + DAG->init(*MF, ORE, nullptr, nullptr, nullptr, nullptr, nullptr, MMI, + nullptr, TTI.hasBranchDivergence(F)); + } + + TargetLoweringBase::LegalizeTypeAction getTypeAction(EVT VT) { + return DAG->getTargetLoweringInfo().getTypeAction(Context, VT); + } + + EVT getTypeToTransformTo(EVT VT) { + return DAG->getTargetLoweringInfo().getTypeToTransformTo(Context, VT); + } + + LLVMContext Context; + std::unique_ptr TM; + std::unique_ptr M; + Function *F; + GlobalVariable *G; + GlobalAlias *AliasedG; + std::unique_ptr MF; + std::unique_ptr DAG; +}; From 28961337385acdb5662a71e142aa30e253c17e16 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Thu, 17 Jul 2025 12:39:50 +0100 Subject: [PATCH 2/3] [LLVM][SelectionDAG] Align poison/undef binop folds with IR. The "at construction" binop folds in SelectionDAG::getNode() has different behaviour when compared to the equivalent LLVM IR. This PR makes the behaviour consistent while also extending the coverage to include signed/unsigned max/min operations. --- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 81 ++- llvm/test/CodeGen/AArch64/combine-and-like.ll | 1 - llvm/test/CodeGen/AMDGPU/saddsat.ll | 2 +- llvm/test/CodeGen/AMDGPU/uaddsat.ll | 5 +- .../test/CodeGen/AMDGPU/vector-reduce-smax.ll | 584 +++++++++--------- .../test/CodeGen/AMDGPU/vector-reduce-smin.ll | 584 +++++++++--------- .../test/CodeGen/AMDGPU/vector-reduce-umax.ll | 553 ++++++++--------- .../test/CodeGen/AMDGPU/vector-reduce-umin.ll | 470 +++++++------- llvm/test/CodeGen/X86/combine-add-ssat.ll | 4 +- llvm/test/CodeGen/X86/combine-add-usat.ll | 5 +- llvm/test/CodeGen/X86/combine-sub-ssat.ll | 4 +- llvm/test/CodeGen/X86/combine-sub-usat.ll | 6 +- llvm/test/CodeGen/X86/load-combine.ll | 6 +- llvm/test/CodeGen/X86/pr33960.ll | 2 - .../SelectionDAGNodeConstructionTest.cpp | 188 +++--- llvm/unittests/CodeGen/SelectionDAGTestBase.h | 2 +- 16 files changed, 1279 insertions(+), 1218 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 5c586f73aa125..02d1100c9e7cb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7843,20 +7843,43 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } } - // Perform trivial constant folding. - if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}, Flags)) - return SV; + if (N1.getOpcode() == ISD::POISON || N2.getOpcode() == ISD::POISON) { + switch (Opcode) { + case ISD::XOR: + case ISD::ADD: + case ISD::PTRADD: + case ISD::SUB: + case ISD::SIGN_EXTEND_INREG: + case ISD::UDIV: + case ISD::SDIV: + case ISD::UREM: + case ISD::SREM: + case ISD::MUL: + case ISD::AND: + case ISD::SSUBSAT: + case ISD::USUBSAT: + case ISD::UMIN: + case ISD::OR: + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::UMAX: + case ISD::SMAX: + case ISD::SMIN: + // fold op(arg1, poison) -> poison, fold op(poison, arg2) -> poison. + return N2.getOpcode() == ISD::POISON ? N2 : N1; + } + } // Canonicalize an UNDEF to the RHS, even over a constant. - if (N1.isUndef()) { + if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() != ISD::UNDEF) { if (TLI->isCommutativeBinOp(Opcode)) { std::swap(N1, N2); } else { switch (Opcode) { case ISD::PTRADD: case ISD::SUB: - // fold op(undef, arg2) -> undef, fold op(poison, arg2) ->poison. - return N1.getOpcode() == ISD::POISON ? getPOISON(VT) : getUNDEF(VT); + // fold op(undef, non_undef_arg2) -> undef. + return N1; case ISD::SIGN_EXTEND_INREG: case ISD::UDIV: case ISD::SDIV: @@ -7864,18 +7887,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::SREM: case ISD::SSUBSAT: case ISD::USUBSAT: - // fold op(undef, arg2) -> 0, fold op(poison, arg2) -> poison. - return N1.getOpcode() == ISD::POISON ? getPOISON(VT) - : getConstant(0, DL, VT); + // fold op(undef, non_undef_arg2) -> 0. + return getConstant(0, DL, VT); } } } // Fold a bunch of operators when the RHS is undef. - if (N2.isUndef()) { + if (N2.getOpcode() == ISD::UNDEF) { switch (Opcode) { case ISD::XOR: - if (N1.isUndef()) + if (N1.getOpcode() == ISD::UNDEF) // Handle undef ^ undef -> 0 special case. This is a common // idiom (misuse). return getConstant(0, DL, VT); @@ -7883,29 +7905,48 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::ADD: case ISD::PTRADD: case ISD::SUB: + // fold op(arg1, undef) -> undef. + return N2; case ISD::UDIV: case ISD::SDIV: case ISD::UREM: case ISD::SREM: - // fold op(arg1, undef) -> undef, fold op(arg1, poison) -> poison. - return N2.getOpcode() == ISD::POISON ? getPOISON(VT) : getUNDEF(VT); + // fold op(arg1, undef) -> poison. + return getPOISON(VT); case ISD::MUL: case ISD::AND: case ISD::SSUBSAT: case ISD::USUBSAT: - // fold op(arg1, undef) -> 0, fold op(arg1, poison) -> poison. - return N2.getOpcode() == ISD::POISON ? getPOISON(VT) - : getConstant(0, DL, VT); + case ISD::UMIN: + // fold op(undef, undef) -> undef, fold op(arg1, undef) -> 0. + return N1.getOpcode() == ISD::UNDEF ? N2 : getConstant(0, DL, VT); case ISD::OR: case ISD::SADDSAT: case ISD::UADDSAT: - // fold op(arg1, undef) -> an all-ones constant, fold op(arg1, poison) -> - // poison. - return N2.getOpcode() == ISD::POISON ? getPOISON(VT) - : getAllOnesConstant(DL, VT); + case ISD::UMAX: + // fold op(undef, undef) -> undef, fold op(arg1, undef) -> -1. + return N1.getOpcode() == ISD::UNDEF ? N2 : getAllOnesConstant(DL, VT); + case ISD::SMAX: + // fold op(undef, undef) -> undef, fold op(arg1, undef) -> MAX_INT. + return N1.getOpcode() == ISD::UNDEF + ? N2 + : getConstant( + APInt::getSignedMaxValue(VT.getScalarSizeInBits()), DL, + VT); + case ISD::SMIN: + // fold op(undef, undef) -> undef, fold op(arg1, undef) -> MIN_INT. + return N1.getOpcode() == ISD::UNDEF + ? N2 + : getConstant( + APInt::getSignedMinValue(VT.getScalarSizeInBits()), DL, + VT); } } + // Perform trivial constant folding. + if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}, Flags)) + return SV; + // Memoize this node if possible. SDNode *N; SDVTList VTs = getVTList(VT); diff --git a/llvm/test/CodeGen/AArch64/combine-and-like.ll b/llvm/test/CodeGen/AArch64/combine-and-like.ll index 15770c2e02ffd..ea1359b300620 100644 --- a/llvm/test/CodeGen/AArch64/combine-and-like.ll +++ b/llvm/test/CodeGen/AArch64/combine-and-like.ll @@ -4,7 +4,6 @@ define i32 @f(i32 %a0) { ; CHECK-LABEL: f: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret %1 = lshr i32 %a0, 2147483647 %2 = add i32 %1, 2147483647 diff --git a/llvm/test/CodeGen/AMDGPU/saddsat.ll b/llvm/test/CodeGen/AMDGPU/saddsat.ll index 4e27cf20d3c98..019eb2c661edc 100644 --- a/llvm/test/CodeGen/AMDGPU/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/saddsat.ll @@ -235,7 +235,7 @@ define <3 x i16> @v_saddsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) { ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_med3_i32 v3, v2, s4, v4 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX6-NEXT: v_or_b32_e32 v2, 0xffff0000, v3 +; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3 ; GFX6-NEXT: v_alignbit_b32 v1, v3, v1, 16 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/uaddsat.ll index 79adc9ead62e1..923017400adb1 100644 --- a/llvm/test/CodeGen/AMDGPU/uaddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/uaddsat.ll @@ -202,10 +202,9 @@ define <3 x i16> @v_uaddsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) { ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; GFX6-NEXT: v_min_u32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_min_u32_e32 v3, 0xffff, v2 +; GFX6-NEXT: v_min_u32_e32 v2, 0xffff, v2 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX6-NEXT: v_or_b32_e32 v2, 0xffff0000, v3 -; GFX6-NEXT: v_alignbit_b32 v1, v3, v1, 16 +; GFX6-NEXT: v_alignbit_b32 v1, v2, v1, 16 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_uaddsat_v3i16: diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-smax.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-smax.ll index c12265bd7f372..ed2f06b8136a2 100644 --- a/llvm/test/CodeGen/AMDGPU/vector-reduce-smax.ll +++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-smax.ll @@ -604,18 +604,18 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) { ; GFX7-SDAG-LABEL: test_vector_reduce_smax_v8i8: ; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 -; GFX7-SDAG-NEXT: v_max_i32_e32 v2, v2, v6 -; GFX7-SDAG-NEXT: v_max_i32_e32 v3, v3, v7 -; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v5, v3 -; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v4, v2 +; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8 +; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v4 +; GFX7-SDAG-NEXT: v_max_i32_e32 v1, v1, v5 +; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v3, v7 +; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v2, v6 ; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -698,15 +698,15 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) { ; GFX9-SDAG-LABEL: test_vector_reduce_smax_v8i8: ; GFX9-SDAG: ; %bb.0: ; %entry ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8 -; GFX9-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 -; GFX9-SDAG-NEXT: v_max_i16_sdwa v3, sext(v3), sext(v7) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 -; GFX9-SDAG-NEXT: v_max3_i16 v1, v1, v5, v3 -; GFX9-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX9-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX9-SDAG-NEXT: v_max_i16_sdwa v2, sext(v2), sext(v6) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8 +; GFX9-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8 +; GFX9-SDAG-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v5) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_max3_i16 v1, v1, v3, v7 +; GFX9-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8 +; GFX9-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8 +; GFX9-SDAG-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-SDAG-NEXT: v_max3_i16 v0, v0, v4, v2 +; GFX9-SDAG-NEXT: v_max3_i16 v0, v0, v2, v6 ; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX9-SDAG-NEXT: v_max_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -741,20 +741,20 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) { ; GFX10-SDAG-LABEL: test_vector_reduce_smax_v8i8: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 -; GFX10-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX10-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX10-SDAG-NEXT: v_max_i16 v3, v3, v7 +; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8 +; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX10-SDAG-NEXT: v_max3_i16 v1, v1, v5, v3 -; GFX10-SDAG-NEXT: v_bfe_i32 v3, v6, 0, 8 +; GFX10-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8 +; GFX10-SDAG-NEXT: v_max_i16 v1, v1, v5 +; GFX10-SDAG-NEXT: v_max3_i16 v1, v1, v3, v7 +; GFX10-SDAG-NEXT: v_bfe_i32 v3, v4, 0, 8 +; GFX10-SDAG-NEXT: v_bfe_i32 v4, v6, 0, 8 ; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX10-SDAG-NEXT: v_max_i16 v2, v2, v3 +; GFX10-SDAG-NEXT: v_max_i16 v0, v0, v3 ; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; GFX10-SDAG-NEXT: v_max3_i16 v0, v0, v4, v2 +; GFX10-SDAG-NEXT: v_max3_i16 v0, v0, v2, v4 ; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX10-SDAG-NEXT: v_max_i16 v0, v0, v1 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -796,62 +796,62 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) { ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smax_v8i8: ; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v3, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v3, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v1, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v5, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4) -; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v1.l, v5.l, v3.l, v1.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v6, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l -; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v6.l, 8, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v5.l, v3.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v7, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v5.l, 8, v0.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v5, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v6 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v5 ; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v2.l, v0.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v2.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_smax_v8i8: ; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v3, v3, v7 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v5, v3 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v6, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v1, v1, v5 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v3, v7 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v4, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v6, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v2, v2, v3 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v3 ; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v4, v2 +; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v2, v4 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v1 @@ -906,39 +906,39 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) { ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v3, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v3, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v1, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v5, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4) -; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v1.l, v5.l, v3.l, v1.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v6, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l -; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v6.l, 8, v1.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v5.l, v3.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v7, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v5.l, 8, v0.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v5, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v6 +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v5 ; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v2.l, v0.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v2.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -949,23 +949,23 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) { ; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v3, v3, v7 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v5, v3 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v6, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v1, v1, v5 +; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v3, v7 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v4, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v6, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v2, v2, v3 +; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v3 ; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v4, v2 +; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v2, v4 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v1 @@ -1025,32 +1025,32 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) { ; GFX7-SDAG-LABEL: test_vector_reduce_smax_v16i8: ; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX7-SDAG-NEXT: v_max_i32_e32 v7, v7, v15 -; GFX7-SDAG-NEXT: v_max_i32_e32 v6, v6, v14 -; GFX7-SDAG-NEXT: v_max_i32_e32 v4, v4, v12 -; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v8 -; GFX7-SDAG-NEXT: v_max_i32_e32 v5, v5, v13 +; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8 ; GFX7-SDAG-NEXT: v_max_i32_e32 v1, v1, v9 -; GFX7-SDAG-NEXT: v_max3_i32 v2, v2, v10, v6 -; GFX7-SDAG-NEXT: v_max3_i32 v3, v3, v11, v7 -; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v5, v3 -; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v4, v2 +; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v8 +; GFX7-SDAG-NEXT: v_max_i32_e32 v6, v6, v14 +; GFX7-SDAG-NEXT: v_max_i32_e32 v2, v2, v10 +; GFX7-SDAG-NEXT: v_max_i32_e32 v7, v7, v15 +; GFX7-SDAG-NEXT: v_max_i32_e32 v3, v3, v11 +; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v4, v12 +; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v5, v13 +; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v3, v7 +; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v2, v6 ; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -1165,21 +1165,21 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) { ; GFX9-SDAG-LABEL: test_vector_reduce_smax_v16i8: ; GFX9-SDAG: ; %bb.0: ; %entry ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8 -; GFX9-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8 -; GFX9-SDAG-NEXT: v_max_i16_sdwa v7, sext(v7), sext(v15) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 -; GFX9-SDAG-NEXT: v_max_i16_sdwa v5, sext(v5), sext(v13) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8 +; GFX9-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX9-SDAG-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 -; GFX9-SDAG-NEXT: v_max3_i16 v3, v3, v11, v7 -; GFX9-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8 -; GFX9-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX9-SDAG-NEXT: v_max_i16_sdwa v6, sext(v6), sext(v14) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 -; GFX9-SDAG-NEXT: v_max3_i16 v1, v1, v5, v3 -; GFX9-SDAG-NEXT: v_max_i16_sdwa v4, sext(v4), sext(v12) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_max_i16_sdwa v7, sext(v7), sext(v15) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_max_i16_sdwa v3, sext(v3), sext(v11) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_max3_i16 v1, v1, v5, v13 +; GFX9-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 8 +; GFX9-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8 ; GFX9-SDAG-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 -; GFX9-SDAG-NEXT: v_max3_i16 v2, v2, v10, v6 +; GFX9-SDAG-NEXT: v_max3_i16 v1, v1, v3, v7 +; GFX9-SDAG-NEXT: v_max_i16_sdwa v6, sext(v6), sext(v14) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_max_i16_sdwa v2, sext(v2), sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_max3_i16 v0, v0, v4, v12 ; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-SDAG-NEXT: v_max3_i16 v0, v0, v4, v2 +; GFX9-SDAG-NEXT: v_max3_i16 v0, v0, v2, v6 ; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX9-SDAG-NEXT: v_max_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -1222,34 +1222,34 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) { ; GFX10-SDAG-LABEL: test_vector_reduce_smax_v16i8: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8 -; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX10-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8 -; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 8 +; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8 -; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX10-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8 +; GFX10-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8 +; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8 +; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8 +; GFX10-SDAG-NEXT: v_max_i16 v1, v1, v9 +; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 +; GFX10-SDAG-NEXT: v_bfe_i32 v9, v12, 0, 8 ; GFX10-SDAG-NEXT: v_max_i16 v7, v7, v15 +; GFX10-SDAG-NEXT: v_max_i16 v3, v3, v11 +; GFX10-SDAG-NEXT: v_max3_i16 v1, v1, v5, v13 +; GFX10-SDAG-NEXT: v_bfe_i32 v5, v8, 0, 8 +; GFX10-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8 +; GFX10-SDAG-NEXT: v_bfe_i32 v8, v10, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8 -; GFX10-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8 -; GFX10-SDAG-NEXT: v_max_i16 v5, v5, v13 -; GFX10-SDAG-NEXT: v_max_i16 v1, v1, v9 -; GFX10-SDAG-NEXT: v_max3_i16 v3, v3, v11, v7 -; GFX10-SDAG-NEXT: v_bfe_i32 v7, v14, 0, 8 +; GFX10-SDAG-NEXT: v_max3_i16 v1, v1, v3, v7 +; GFX10-SDAG-NEXT: v_bfe_i32 v3, v14, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX10-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 8 -; GFX10-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX10-SDAG-NEXT: v_max3_i16 v1, v1, v5, v3 -; GFX10-SDAG-NEXT: v_bfe_i32 v3, v12, 0, 8 -; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX10-SDAG-NEXT: v_max_i16 v5, v6, v7 +; GFX10-SDAG-NEXT: v_max_i16 v0, v0, v5 ; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX10-SDAG-NEXT: v_max_i16 v3, v4, v3 -; GFX10-SDAG-NEXT: v_max_i16 v0, v0, v8 -; GFX10-SDAG-NEXT: v_max3_i16 v2, v2, v10, v5 +; GFX10-SDAG-NEXT: v_max_i16 v3, v6, v3 +; GFX10-SDAG-NEXT: v_max_i16 v2, v2, v8 +; GFX10-SDAG-NEXT: v_max3_i16 v0, v0, v4, v9 ; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; GFX10-SDAG-NEXT: v_max3_i16 v0, v0, v3, v2 +; GFX10-SDAG-NEXT: v_max3_i16 v0, v0, v2, v3 ; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX10-SDAG-NEXT: v_max_i16 v0, v0, v1 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -1307,59 +1307,58 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) { ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smax_v16i8: ; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v2, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v15.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v17.l, v0.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v9.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v11, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v15, v2, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v7, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v14, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v14, v6, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v4.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v0, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v4, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v5.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v9.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v0, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v11.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v4, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v6, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v10, v10, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v15.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v13, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v18.l, v2.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v7.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v6.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v0, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v3, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v13.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v11.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l ; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v15.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v11.l -; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.h, v4.l, v5.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v11.l, v12.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v9, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v6.l, v0.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v8, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v14.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v17, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.h, v0.h, v0.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v11, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l ; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l +; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.h, v4.l, v5.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v6.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v12, v12, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v14.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v10, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l +; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.h, v0.h +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v17.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v18, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v9, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v10.l -; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v4.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v12.l ; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7 +; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v4.l ; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v5.l, v0.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.h, v2.l, v3.l, v0.h +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.h, v0.h, v2.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v0.h +; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.h, v0.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l @@ -1368,37 +1367,37 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) { ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_smax_v16i8: ; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v9, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v13, v13, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v1, v1, v9 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v12, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v7, v7, v15 +; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v3, v3, v11 +; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v5, v13 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v8, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v10, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v6, v6, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v10, v10, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v5, v5, v13 -; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v1, v1, v9 -; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v3, v3, v11, v7 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v14, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v3, v7 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v14, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v8, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v5, v3 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v12, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v5, v6, v7 +; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v5 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v3, v4, v3 +; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v3, v6, v3 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v8 -; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v2, v2, v10, v5 +; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v2, v2, v8 +; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v4, v9 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v3, v2 +; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v2, v3 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v1 @@ -1468,59 +1467,58 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) { ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v2, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v15.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v17.l, v0.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v9.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v11, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v15, v2, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v7, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v14, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v14, v6, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v4.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v0, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v4, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v5.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v9.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v0, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v11.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v4, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v6, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v10, v10, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v15.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v13, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v18.l, v2.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v7.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v6.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v0, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v3, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v13.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v11.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l ; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v15.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v11.l -; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.h, v4.l, v5.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v11.l, v12.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v9, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v6.l, v0.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v8, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v14.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v17, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.h, v0.h, v0.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v11, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l ; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l +; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.h, v4.l, v5.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v6.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v12, v12, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v14.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v10, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l +; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.h, v0.h +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v17.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v18, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v9, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v10.l -; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v4.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v12.l ; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7 +; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v4.l ; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v5.l, v0.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.h, v2.l, v3.l, v0.h +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.h, v0.h, v2.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v0.h +; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.h, v0.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l @@ -1533,37 +1531,37 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) { ; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v9, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v13, v13, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v1, v1, v9 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v12, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v7, v7, v15 +; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v3, v3, v11 +; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v5, v13 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v8, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v10, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v6, v6, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v10, v10, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v5, v5, v13 -; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v1, v1, v9 -; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v3, v3, v11, v7 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v14, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v3, v7 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v14, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v8, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v5, v3 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v12, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v5, v6, v7 +; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v5 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v3, v4, v3 +; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v3, v6, v3 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v8 -; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v2, v2, v10, v5 +; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v2, v2, v8 +; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v4, v9 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v3, v2 +; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v2, v3 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v1 @@ -2055,18 +2053,18 @@ define i16 @test_vector_reduce_smax_v8i16(<8 x i16> %v) { ; GFX7-SDAG-LABEL: test_vector_reduce_smax_v8i16: ; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16 -; GFX7-SDAG-NEXT: v_max_i32_e32 v2, v2, v6 -; GFX7-SDAG-NEXT: v_max_i32_e32 v3, v3, v7 -; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v5, v3 -; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v4, v2 +; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16 +; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v4 +; GFX7-SDAG-NEXT: v_max_i32_e32 v1, v1, v5 +; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v3, v7 +; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v2, v6 ; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -2253,32 +2251,32 @@ define i16 @test_vector_reduce_smax_v16i16(<16 x i16> %v) { ; GFX7-SDAG-LABEL: test_vector_reduce_smax_v16i16: ; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16 -; GFX7-SDAG-NEXT: v_max_i32_e32 v7, v7, v15 -; GFX7-SDAG-NEXT: v_max_i32_e32 v6, v6, v14 -; GFX7-SDAG-NEXT: v_max_i32_e32 v4, v4, v12 -; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v8 -; GFX7-SDAG-NEXT: v_max_i32_e32 v5, v5, v13 +; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16 ; GFX7-SDAG-NEXT: v_max_i32_e32 v1, v1, v9 -; GFX7-SDAG-NEXT: v_max3_i32 v2, v2, v10, v6 -; GFX7-SDAG-NEXT: v_max3_i32 v3, v3, v11, v7 -; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v5, v3 -; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v4, v2 +; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v8 +; GFX7-SDAG-NEXT: v_max_i32_e32 v6, v6, v14 +; GFX7-SDAG-NEXT: v_max_i32_e32 v2, v2, v10 +; GFX7-SDAG-NEXT: v_max_i32_e32 v7, v7, v15 +; GFX7-SDAG-NEXT: v_max_i32_e32 v3, v3, v11 +; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v4, v12 +; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v5, v13 +; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v3, v7 +; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v2, v6 ; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-smin.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-smin.ll index 5056747c33cc2..8812cae20f110 100644 --- a/llvm/test/CodeGen/AMDGPU/vector-reduce-smin.ll +++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-smin.ll @@ -604,18 +604,18 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) { ; GFX7-SDAG-LABEL: test_vector_reduce_smin_v8i8: ; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 -; GFX7-SDAG-NEXT: v_min_i32_e32 v2, v2, v6 -; GFX7-SDAG-NEXT: v_min_i32_e32 v3, v3, v7 -; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v5, v3 -; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v4, v2 +; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8 +; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v4 +; GFX7-SDAG-NEXT: v_min_i32_e32 v1, v1, v5 +; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v3, v7 +; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v2, v6 ; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -698,15 +698,15 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) { ; GFX9-SDAG-LABEL: test_vector_reduce_smin_v8i8: ; GFX9-SDAG: ; %bb.0: ; %entry ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8 -; GFX9-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 -; GFX9-SDAG-NEXT: v_min_i16_sdwa v3, sext(v3), sext(v7) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 -; GFX9-SDAG-NEXT: v_min3_i16 v1, v1, v5, v3 -; GFX9-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX9-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX9-SDAG-NEXT: v_min_i16_sdwa v2, sext(v2), sext(v6) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8 +; GFX9-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8 +; GFX9-SDAG-NEXT: v_min_i16_sdwa v1, sext(v1), sext(v5) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_min3_i16 v1, v1, v3, v7 +; GFX9-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8 +; GFX9-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8 +; GFX9-SDAG-NEXT: v_min_i16_sdwa v0, sext(v0), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-SDAG-NEXT: v_min3_i16 v0, v0, v4, v2 +; GFX9-SDAG-NEXT: v_min3_i16 v0, v0, v2, v6 ; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX9-SDAG-NEXT: v_min_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -741,20 +741,20 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) { ; GFX10-SDAG-LABEL: test_vector_reduce_smin_v8i8: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 -; GFX10-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX10-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX10-SDAG-NEXT: v_min_i16 v3, v3, v7 +; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8 +; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX10-SDAG-NEXT: v_min3_i16 v1, v1, v5, v3 -; GFX10-SDAG-NEXT: v_bfe_i32 v3, v6, 0, 8 +; GFX10-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8 +; GFX10-SDAG-NEXT: v_min_i16 v1, v1, v5 +; GFX10-SDAG-NEXT: v_min3_i16 v1, v1, v3, v7 +; GFX10-SDAG-NEXT: v_bfe_i32 v3, v4, 0, 8 +; GFX10-SDAG-NEXT: v_bfe_i32 v4, v6, 0, 8 ; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX10-SDAG-NEXT: v_min_i16 v2, v2, v3 +; GFX10-SDAG-NEXT: v_min_i16 v0, v0, v3 ; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; GFX10-SDAG-NEXT: v_min3_i16 v0, v0, v4, v2 +; GFX10-SDAG-NEXT: v_min3_i16 v0, v0, v2, v4 ; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX10-SDAG-NEXT: v_min_i16 v0, v0, v1 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -796,62 +796,62 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) { ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smin_v8i8: ; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v3, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v3, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v1, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v5, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4) -; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v1.l, v5.l, v3.l, v1.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v6, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l -; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v6.l, 8, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v5.l, v3.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v7, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v5.l, 8, v0.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v5, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v6 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v5 ; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v2.l, v0.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v2.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_smin_v8i8: ; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v3, v3, v7 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v5, v3 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v6, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v1, v1, v5 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v3, v7 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v4, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v6, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v2, v2, v3 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v3 ; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v4, v2 +; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v2, v4 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v1 @@ -906,39 +906,39 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) { ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v3, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v3, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v1, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v5, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4) -; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v1.l, v5.l, v3.l, v1.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v6, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l -; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v6.l, 8, v1.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v5.l, v3.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v7, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v5.l, 8, v0.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v5, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v6 +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v5 ; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v2.l, v0.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v2.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -949,23 +949,23 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) { ; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v3, v3, v7 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v5, v3 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v6, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v1, v1, v5 +; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v3, v7 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v4, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v6, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v2, v2, v3 +; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v3 ; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v4, v2 +; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v2, v4 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v1 @@ -1025,32 +1025,32 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) { ; GFX7-SDAG-LABEL: test_vector_reduce_smin_v16i8: ; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 8 -; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX7-SDAG-NEXT: v_min_i32_e32 v7, v7, v15 -; GFX7-SDAG-NEXT: v_min_i32_e32 v6, v6, v14 -; GFX7-SDAG-NEXT: v_min_i32_e32 v4, v4, v12 -; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v8 -; GFX7-SDAG-NEXT: v_min_i32_e32 v5, v5, v13 +; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 8 +; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8 ; GFX7-SDAG-NEXT: v_min_i32_e32 v1, v1, v9 -; GFX7-SDAG-NEXT: v_min3_i32 v2, v2, v10, v6 -; GFX7-SDAG-NEXT: v_min3_i32 v3, v3, v11, v7 -; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v5, v3 -; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v4, v2 +; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v8 +; GFX7-SDAG-NEXT: v_min_i32_e32 v6, v6, v14 +; GFX7-SDAG-NEXT: v_min_i32_e32 v2, v2, v10 +; GFX7-SDAG-NEXT: v_min_i32_e32 v7, v7, v15 +; GFX7-SDAG-NEXT: v_min_i32_e32 v3, v3, v11 +; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v4, v12 +; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v5, v13 +; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v3, v7 +; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v2, v6 ; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -1165,21 +1165,21 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) { ; GFX9-SDAG-LABEL: test_vector_reduce_smin_v16i8: ; GFX9-SDAG: ; %bb.0: ; %entry ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8 -; GFX9-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8 -; GFX9-SDAG-NEXT: v_min_i16_sdwa v7, sext(v7), sext(v15) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 -; GFX9-SDAG-NEXT: v_min_i16_sdwa v5, sext(v5), sext(v13) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8 +; GFX9-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX9-SDAG-NEXT: v_min_i16_sdwa v1, sext(v1), sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 -; GFX9-SDAG-NEXT: v_min3_i16 v3, v3, v11, v7 -; GFX9-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8 -; GFX9-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX9-SDAG-NEXT: v_min_i16_sdwa v6, sext(v6), sext(v14) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 -; GFX9-SDAG-NEXT: v_min3_i16 v1, v1, v5, v3 -; GFX9-SDAG-NEXT: v_min_i16_sdwa v4, sext(v4), sext(v12) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_min_i16_sdwa v7, sext(v7), sext(v15) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_min_i16_sdwa v3, sext(v3), sext(v11) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_min3_i16 v1, v1, v5, v13 +; GFX9-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 8 +; GFX9-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8 ; GFX9-SDAG-NEXT: v_min_i16_sdwa v0, sext(v0), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 -; GFX9-SDAG-NEXT: v_min3_i16 v2, v2, v10, v6 +; GFX9-SDAG-NEXT: v_min3_i16 v1, v1, v3, v7 +; GFX9-SDAG-NEXT: v_min_i16_sdwa v6, sext(v6), sext(v14) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_min_i16_sdwa v2, sext(v2), sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_min3_i16 v0, v0, v4, v12 ; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-SDAG-NEXT: v_min3_i16 v0, v0, v4, v2 +; GFX9-SDAG-NEXT: v_min3_i16 v0, v0, v2, v6 ; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX9-SDAG-NEXT: v_min_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -1222,34 +1222,34 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) { ; GFX10-SDAG-LABEL: test_vector_reduce_smin_v16i8: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8 -; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX10-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8 -; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 8 +; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8 -; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX10-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8 +; GFX10-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8 +; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8 +; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8 +; GFX10-SDAG-NEXT: v_min_i16 v1, v1, v9 +; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 +; GFX10-SDAG-NEXT: v_bfe_i32 v9, v12, 0, 8 ; GFX10-SDAG-NEXT: v_min_i16 v7, v7, v15 +; GFX10-SDAG-NEXT: v_min_i16 v3, v3, v11 +; GFX10-SDAG-NEXT: v_min3_i16 v1, v1, v5, v13 +; GFX10-SDAG-NEXT: v_bfe_i32 v5, v8, 0, 8 +; GFX10-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8 +; GFX10-SDAG-NEXT: v_bfe_i32 v8, v10, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8 -; GFX10-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8 -; GFX10-SDAG-NEXT: v_min_i16 v5, v5, v13 -; GFX10-SDAG-NEXT: v_min_i16 v1, v1, v9 -; GFX10-SDAG-NEXT: v_min3_i16 v3, v3, v11, v7 -; GFX10-SDAG-NEXT: v_bfe_i32 v7, v14, 0, 8 +; GFX10-SDAG-NEXT: v_min3_i16 v1, v1, v3, v7 +; GFX10-SDAG-NEXT: v_bfe_i32 v3, v14, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX10-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 8 -; GFX10-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX10-SDAG-NEXT: v_min3_i16 v1, v1, v5, v3 -; GFX10-SDAG-NEXT: v_bfe_i32 v3, v12, 0, 8 -; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX10-SDAG-NEXT: v_min_i16 v5, v6, v7 +; GFX10-SDAG-NEXT: v_min_i16 v0, v0, v5 ; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX10-SDAG-NEXT: v_min_i16 v3, v4, v3 -; GFX10-SDAG-NEXT: v_min_i16 v0, v0, v8 -; GFX10-SDAG-NEXT: v_min3_i16 v2, v2, v10, v5 +; GFX10-SDAG-NEXT: v_min_i16 v3, v6, v3 +; GFX10-SDAG-NEXT: v_min_i16 v2, v2, v8 +; GFX10-SDAG-NEXT: v_min3_i16 v0, v0, v4, v9 ; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; GFX10-SDAG-NEXT: v_min3_i16 v0, v0, v3, v2 +; GFX10-SDAG-NEXT: v_min3_i16 v0, v0, v2, v3 ; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX10-SDAG-NEXT: v_min_i16 v0, v0, v1 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -1307,59 +1307,58 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) { ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smin_v16i8: ; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v2, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v15.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v17.l, v0.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v9.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v11, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v15, v2, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v7, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v14, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v14, v6, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v4.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v0, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v4, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v5.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v9.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v0, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v11.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v4, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v6, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v10, v10, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v15.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v13, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v18.l, v2.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v7.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v6.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v0, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v3, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v13.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v11.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l ; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v15.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v11.l -; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.h, v4.l, v5.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v11.l, v12.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v9, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v6.l, v0.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v8, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v14.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v17, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.h, v0.h, v0.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v11, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l ; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l +; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.h, v4.l, v5.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v6.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v12, v12, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v14.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v10, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l +; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.h, v0.h +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v17.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v18, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v9, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v10.l -; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v4.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v12.l ; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7 +; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v4.l ; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v5.l, v0.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.h, v2.l, v3.l, v0.h +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.h, v0.h, v2.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v0.h +; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.h, v0.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l @@ -1368,37 +1367,37 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) { ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_smin_v16i8: ; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v9, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v13, v13, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v1, v1, v9 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v12, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v7, v7, v15 +; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v3, v3, v11 +; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v5, v13 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v8, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v10, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v6, v6, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v10, v10, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v5, v5, v13 -; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v1, v1, v9 -; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v3, v3, v11, v7 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v14, 0, 8 +; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v3, v7 +; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v14, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v8, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v5, v3 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v12, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v5, v6, v7 +; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v5 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v3, v4, v3 +; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v3, v6, v3 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v8 -; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v2, v2, v10, v5 +; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v2, v2, v8 +; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v4, v9 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v3, v2 +; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v2, v3 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v1 @@ -1468,59 +1467,58 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) { ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v2, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v15.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v17.l, v0.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v9.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v11, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v15, v2, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v7, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v14, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v14, v6, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v4.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v0, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v4, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v5.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v9.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v0, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v11.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v4, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v6, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v10, v10, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v15.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v13, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v18.l, v2.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v7.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v6.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v0, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v3, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v13.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v11.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l ; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v15.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v11.l -; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.h, v4.l, v5.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v11.l, v12.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v9, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v6.l, v0.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v8, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v14.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v17, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.h, v0.h, v0.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v11, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l ; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l +; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.h, v4.l, v5.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v6.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v12, v12, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v14.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v10, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l +; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.h, v0.h +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v17.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v18, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v9, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v10.l -; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v4.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v12.l ; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7 +; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v4.l ; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v5.l, v0.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.h, v2.l, v3.l, v0.h +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.h, v0.h, v2.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v0.h +; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.h, v0.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l @@ -1533,37 +1531,37 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) { ; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v9, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v13, v13, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v1, v1, v9 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v12, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v7, v7, v15 +; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v3, v3, v11 +; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v5, v13 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v8, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v10, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v6, v6, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v10, v10, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v5, v5, v13 -; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v1, v1, v9 -; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v3, v3, v11, v7 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v14, 0, 8 +; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v3, v7 +; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v14, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v8, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v5, v3 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v12, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v5, v6, v7 +; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v5 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v3, v4, v3 +; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v3, v6, v3 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v8 -; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v2, v2, v10, v5 +; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v2, v2, v8 +; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v4, v9 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v3, v2 +; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v2, v3 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v1 @@ -2055,18 +2053,18 @@ define i16 @test_vector_reduce_smin_v8i16(<8 x i16> %v) { ; GFX7-SDAG-LABEL: test_vector_reduce_smin_v8i16: ; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16 -; GFX7-SDAG-NEXT: v_min_i32_e32 v2, v2, v6 -; GFX7-SDAG-NEXT: v_min_i32_e32 v3, v3, v7 -; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v5, v3 -; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v4, v2 +; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16 +; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v4 +; GFX7-SDAG-NEXT: v_min_i32_e32 v1, v1, v5 +; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v3, v7 +; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v2, v6 ; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -2253,32 +2251,32 @@ define i16 @test_vector_reduce_smin_v16i16(<16 x i16> %v) { ; GFX7-SDAG-LABEL: test_vector_reduce_smin_v16i16: ; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 16 -; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16 -; GFX7-SDAG-NEXT: v_min_i32_e32 v7, v7, v15 -; GFX7-SDAG-NEXT: v_min_i32_e32 v6, v6, v14 -; GFX7-SDAG-NEXT: v_min_i32_e32 v4, v4, v12 -; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v8 -; GFX7-SDAG-NEXT: v_min_i32_e32 v5, v5, v13 +; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 16 +; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16 ; GFX7-SDAG-NEXT: v_min_i32_e32 v1, v1, v9 -; GFX7-SDAG-NEXT: v_min3_i32 v2, v2, v10, v6 -; GFX7-SDAG-NEXT: v_min3_i32 v3, v3, v11, v7 -; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v5, v3 -; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v4, v2 +; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v8 +; GFX7-SDAG-NEXT: v_min_i32_e32 v6, v6, v14 +; GFX7-SDAG-NEXT: v_min_i32_e32 v2, v2, v10 +; GFX7-SDAG-NEXT: v_min_i32_e32 v7, v7, v15 +; GFX7-SDAG-NEXT: v_min_i32_e32 v3, v3, v11 +; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v4, v12 +; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v5, v13 +; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v3, v7 +; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v2, v6 ; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-umax.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-umax.ll index ddae1b296024e..82eb122f9f703 100644 --- a/llvm/test/CodeGen/AMDGPU/vector-reduce-umax.ll +++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-umax.ll @@ -320,7 +320,7 @@ define i8 @test_vector_reduce_umax_v4i8(<4 x i8> %v) { ; GFX8-SDAG-NEXT: v_max_u16_sdwa v1, v1, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX8-SDAG-NEXT: v_max_u16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; GFX8-SDAG-NEXT: v_max_u16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: v_max_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-GISEL-LABEL: test_vector_reduce_umax_v4i8: @@ -351,8 +351,9 @@ define i8 @test_vector_reduce_umax_v4i8(<4 x i8> %v) { ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-SDAG-NEXT: v_max_u16_sdwa v1, v1, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 +; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX9-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX9-SDAG-NEXT: v_max3_u16 v0, v0, v2, v1 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -387,9 +388,9 @@ define i8 @test_vector_reduce_umax_v4i8(<4 x i8> %v) { ; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX10-SDAG-NEXT: v_max_u16 v1, v1, v3 -; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 8 ; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX10-SDAG-NEXT: v_lshrrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 +; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10-SDAG-NEXT: v_max3_u16 v0, v0, v2, v1 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -429,8 +430,8 @@ define i8 @test_vector_reduce_umax_v4i8(<4 x i8> %v) { ; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v1.l, v1.h ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v0.h, v1.l ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] @@ -446,8 +447,8 @@ define i8 @test_vector_reduce_umax_v4i8(<4 x i8> %v) { ; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v3 ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v2, v1 ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] @@ -500,8 +501,8 @@ define i8 @test_vector_reduce_umax_v4i8(<4 x i8> %v) { ; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v1.l, v1.h ; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v0.h, v1.l ; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] @@ -521,8 +522,8 @@ define i8 @test_vector_reduce_umax_v4i8(<4 x i8> %v) { ; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v3 ; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v2, v1 ; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] @@ -572,18 +573,18 @@ define i8 @test_vector_reduce_umax_v8i8(<8 x i8> %v) { ; GFX7-SDAG-LABEL: test_vector_reduce_umax_v8i8: ; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX7-SDAG-NEXT: v_max_u32_e32 v2, v2, v6 -; GFX7-SDAG-NEXT: v_max_u32_e32 v3, v3, v7 -; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v5, v3 -; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v4, v2 +; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v4 +; GFX7-SDAG-NEXT: v_max_u32_e32 v1, v1, v5 +; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v3, v7 +; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v2, v6 ; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -628,7 +629,7 @@ define i8 @test_vector_reduce_umax_v8i8(<8 x i8> %v) { ; GFX8-SDAG-NEXT: v_max_u16_sdwa v1, v1, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX8-SDAG-NEXT: v_max_u16_e32 v0, v0, v2 ; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; GFX8-SDAG-NEXT: v_max_u16_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: v_max_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-GISEL-LABEL: test_vector_reduce_umax_v8i8: @@ -660,17 +661,17 @@ define i8 @test_vector_reduce_umax_v8i8(<8 x i8> %v) { ; GFX9-SDAG-LABEL: test_vector_reduce_umax_v8i8: ; GFX9-SDAG: ; %bb.0: ; %entry ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX9-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX9-SDAG-NEXT: v_max_u16_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 -; GFX9-SDAG-NEXT: v_max3_u16 v1, v1, v5, v3 -; GFX9-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX9-SDAG-NEXT: v_max_u16_sdwa v2, v2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX9-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX9-SDAG-NEXT: v_max_u16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_max3_u16 v1, v1, v3, v7 +; GFX9-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX9-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX9-SDAG-NEXT: v_max_u16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-SDAG-NEXT: v_max3_u16 v0, v0, v4, v2 +; GFX9-SDAG-NEXT: v_max3_u16 v0, v0, v2, v6 ; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; GFX9-SDAG-NEXT: v_max_u16_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: v_max_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-GISEL-LABEL: test_vector_reduce_umax_v8i8: @@ -702,21 +703,21 @@ define i8 @test_vector_reduce_umax_v8i8(<8 x i8> %v) { ; GFX10-SDAG-LABEL: test_vector_reduce_umax_v8i8: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 ; GFX10-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX10-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX10-SDAG-NEXT: v_max_u16 v3, v3, v7 +; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 ; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX10-SDAG-NEXT: v_max_u16 v2, v2, v6 -; GFX10-SDAG-NEXT: v_max3_u16 v1, v1, v5, v3 +; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX10-SDAG-NEXT: v_max_u16 v1, v1, v5 +; GFX10-SDAG-NEXT: v_max3_u16 v1, v1, v3, v7 ; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 8 +; GFX10-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v6 ; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX10-SDAG-NEXT: v_max3_u16 v0, v0, v3, v2 -; GFX10-SDAG-NEXT: v_lshrrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GFX10-SDAG-NEXT: v_max_u16 v0, v0, v3 +; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 +; GFX10-SDAG-NEXT: v_max3_u16 v0, v0, v2, v4 +; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10-SDAG-NEXT: v_max_u16 v0, v0, v1 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -756,50 +757,49 @@ define i8 @test_vector_reduce_umax_v8i8(<8 x i8> %v) { ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_umax_v8i8: ; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v7.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v5.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v5.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v7.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l -; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.h, v3.l, v1.h -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v1.l, v1.l, v3.h, v1.h +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v1.l, v1.h ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l -; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.h, v1.h, v0.h -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v1.l, v3.l, v3.h ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v4.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v1.l, v0.h -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v3.l, 8, v0.h +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l ; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v1.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v3 +; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v1.h, v0.h +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v2.l +; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v0.h ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_umax_v8i8: ; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v3, v3, v7 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v5, v3 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v6 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v5 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v3, v7 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v6 ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v2, v2, v3 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v4, v2 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v3 ; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v2, v4 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v1 ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] @@ -852,27 +852,26 @@ define i8 @test_vector_reduce_umax_v8i8(<8 x i8> %v) { ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v7.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v5.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v5.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v7.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l -; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.h, v3.l, v1.h -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v1.l, v1.l, v3.h, v1.h +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v1.l, v1.h ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l -; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.h, v1.h, v0.h -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l +; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v1.l, v3.l, v3.h ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v4.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v1.l, v0.h -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2 +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v3.l, 8, v0.h +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l ; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v1.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v3 +; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v1.h, v0.h +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v2.l +; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v0.h ; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-FAKE16-LABEL: test_vector_reduce_umax_v8i8: @@ -882,24 +881,24 @@ define i8 @test_vector_reduce_umax_v8i8(<8 x i8> %v) { ; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v3, v3, v7 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v5, v3 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v6 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v5 +; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v3, v7 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v6 ; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v2, v2, v3 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v4, v2 +; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v3 ; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 +; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v2, v4 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v1 ; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] @@ -957,32 +956,32 @@ define i8 @test_vector_reduce_umax_v16i8(<16 x i8> %v) { ; GFX7-SDAG-LABEL: test_vector_reduce_umax_v16i8: ; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14 -; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xff, v8 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX7-SDAG-NEXT: v_and_b32_e32 v9, 0xff, v9 ; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX7-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13 ; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xff, v8 -; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX7-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12 ; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX7-SDAG-NEXT: v_max_u32_e32 v7, v7, v15 -; GFX7-SDAG-NEXT: v_max_u32_e32 v6, v6, v14 -; GFX7-SDAG-NEXT: v_max_u32_e32 v4, v4, v12 -; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v8 -; GFX7-SDAG-NEXT: v_max_u32_e32 v5, v5, v13 +; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15 +; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10 +; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14 +; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6 ; GFX7-SDAG-NEXT: v_max_u32_e32 v1, v1, v9 -; GFX7-SDAG-NEXT: v_max3_u32 v2, v2, v10, v6 -; GFX7-SDAG-NEXT: v_max3_u32 v3, v3, v11, v7 -; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v5, v3 -; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v4, v2 +; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v8 +; GFX7-SDAG-NEXT: v_max_u32_e32 v6, v6, v14 +; GFX7-SDAG-NEXT: v_max_u32_e32 v2, v2, v10 +; GFX7-SDAG-NEXT: v_max_u32_e32 v7, v7, v15 +; GFX7-SDAG-NEXT: v_max_u32_e32 v3, v3, v11 +; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v4, v12 +; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v5, v13 +; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v3, v7 +; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v2, v6 ; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -1051,9 +1050,8 @@ define i8 @test_vector_reduce_umax_v16i8(<16 x i8> %v) { ; GFX8-SDAG-NEXT: v_max_u16_e32 v0, v0, v2 ; GFX8-SDAG-NEXT: v_max_u16_sdwa v1, v1, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX8-SDAG-NEXT: v_mov_b32_e32 v1, 8 -; GFX8-SDAG-NEXT: v_lshrrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; GFX8-SDAG-NEXT: v_max_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX8-SDAG-NEXT: v_max_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-GISEL-LABEL: test_vector_reduce_umax_v16i8: @@ -1093,25 +1091,24 @@ define i8 @test_vector_reduce_umax_v16i8(<16 x i8> %v) { ; GFX9-SDAG-LABEL: test_vector_reduce_umax_v16i8: ; GFX9-SDAG: ; %bb.0: ; %entry ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX9-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX9-SDAG-NEXT: v_max_u16_sdwa v7, v7, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 -; GFX9-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX9-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX9-SDAG-NEXT: v_max_u16_sdwa v5, v5, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13 +; GFX9-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX9-SDAG-NEXT: v_max_u16_sdwa v1, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 -; GFX9-SDAG-NEXT: v_max_u16_sdwa v6, v6, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 -; GFX9-SDAG-NEXT: v_max3_u16 v3, v3, v11, v7 -; GFX9-SDAG-NEXT: v_max_u16_sdwa v4, v4, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12 +; GFX9-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX9-SDAG-NEXT: v_max_u16_sdwa v7, v7, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_max_u16_sdwa v3, v3, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_max_u16_sdwa v0, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 -; GFX9-SDAG-NEXT: v_max3_u16 v2, v2, v10, v6 -; GFX9-SDAG-NEXT: v_max3_u16 v1, v1, v5, v3 -; GFX9-SDAG-NEXT: v_max3_u16 v0, v0, v4, v2 +; GFX9-SDAG-NEXT: v_max3_u16 v1, v1, v5, v13 +; GFX9-SDAG-NEXT: v_max_u16_sdwa v6, v6, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_max_u16_sdwa v2, v2, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_max3_u16 v0, v0, v4, v12 +; GFX9-SDAG-NEXT: v_max3_u16 v1, v1, v3, v7 +; GFX9-SDAG-NEXT: v_max3_u16 v0, v0, v2, v6 ; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 ; GFX9-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 8 -; GFX9-SDAG-NEXT: v_lshrrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; GFX9-SDAG-NEXT: v_max_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX9-SDAG-NEXT: v_max_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-GISEL-LABEL: test_vector_reduce_umax_v16i8: @@ -1151,38 +1148,38 @@ define i8 @test_vector_reduce_umax_v16i8(<16 x i8> %v) { ; GFX10-SDAG-LABEL: test_vector_reduce_umax_v16i8: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX10-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX10-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14 -; GFX10-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6 ; GFX10-SDAG-NEXT: v_and_b32_e32 v9, 0xff, v9 +; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13 ; GFX10-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX10-SDAG-NEXT: v_max_u16 v7, v7, v15 -; GFX10-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX10-SDAG-NEXT: v_and_b32_e32 v8, 0xff, v8 ; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX10-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX10-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15 +; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX10-SDAG-NEXT: v_max_u16 v1, v1, v9 ; GFX10-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12 ; GFX10-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX10-SDAG-NEXT: v_max_u16 v5, v5, v13 -; GFX10-SDAG-NEXT: v_max_u16 v1, v1, v9 -; GFX10-SDAG-NEXT: v_max_u16 v6, v6, v14 -; GFX10-SDAG-NEXT: v_max3_u16 v3, v3, v11, v7 -; GFX10-SDAG-NEXT: v_max_u16 v4, v4, v12 +; GFX10-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10 +; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX10-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14 +; GFX10-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX10-SDAG-NEXT: v_max_u16 v7, v7, v15 +; GFX10-SDAG-NEXT: v_max_u16 v3, v3, v11 ; GFX10-SDAG-NEXT: v_max_u16 v0, v0, v8 -; GFX10-SDAG-NEXT: v_max3_u16 v2, v2, v10, v6 -; GFX10-SDAG-NEXT: v_max3_u16 v1, v1, v5, v3 -; GFX10-SDAG-NEXT: v_max3_u16 v0, v0, v4, v2 +; GFX10-SDAG-NEXT: v_max3_u16 v1, v1, v5, v13 +; GFX10-SDAG-NEXT: v_max_u16 v5, v6, v14 +; GFX10-SDAG-NEXT: v_max_u16 v2, v2, v10 +; GFX10-SDAG-NEXT: v_max3_u16 v0, v0, v4, v12 +; GFX10-SDAG-NEXT: v_max3_u16 v1, v1, v3, v7 +; GFX10-SDAG-NEXT: v_max3_u16 v0, v0, v2, v5 ; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX10-SDAG-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v0 -; GFX10-SDAG-NEXT: v_lshrrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; GFX10-SDAG-NEXT: v_max_u16 v0, v2, v0 +; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX10-SDAG-NEXT: v_max_u16 v0, v0, v1 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-LABEL: test_vector_reduce_umax_v16i8: @@ -1237,84 +1234,82 @@ define i8 @test_vector_reduce_umax_v16i8(<16 x i8> %v) { ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_umax_v16i8: ; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v10.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v15.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v7.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v11.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v14.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v9.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v13.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v9.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v13.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v5.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l -; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.h, v0.h, v10.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v8.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v12.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v11.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v15.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v7.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l +; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.h, v0.h, v9.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v12.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l -; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v4.h, v5.l, v4.h -; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v1.l, v1.h -; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.h, v6.l, v6.h -; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v3.l, v3.h, v0.h -; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v3.l, v4.l, v5.h -; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v7.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v10.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v14.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l +; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v3.h, v6.h, v3.h +; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v2.h, v3.l, v2.h +; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v0.h, v5.l, v5.h +; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v6.l, v7.l +; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.h, v2.l, v1.h ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v1.h, v2.l, v2.h, v1.h -; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v1.l, v4.h, v0.h +; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v4.l, v4.h +; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v0.h, v2.h, v3.h ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v3.l, v1.h +; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v1.h, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v0.h ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l +; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v0.h ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_umax_v16i8: ; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v7, v7, v15 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v9 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v5, v5, v13 -; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v9 -; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v6, v6, v14 -; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v3, v3, v11, v7 -; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v4, v4, v12 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v7, v7, v15 +; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v3, v3, v11 ; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v8 +; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v5, v13 +; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v5, v6, v14 +; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v2, v2, v10 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v2, v2, v10, v6 -; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v5, v3 +; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v4, v12 +; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v3, v7 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v4, v2 +; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v2, v5 ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v0 +; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v1 ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -1382,44 +1377,42 @@ define i8 @test_vector_reduce_umax_v16i8(<16 x i8> %v) { ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v10.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v15.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v7.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v11.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v14.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v9.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v13.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v9.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v13.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v5.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l -; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.h, v0.h, v10.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v8.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v12.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v11.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v15.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v7.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l +; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.h, v0.h, v9.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v12.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l -; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v4.h, v5.l, v4.h -; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v1.l, v1.h -; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.h, v6.l, v6.h -; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v3.l, v3.h, v0.h -; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v3.l, v4.l, v5.h -; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v7.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v10.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v14.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l +; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v3.h, v6.h, v3.h +; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v2.h, v3.l, v2.h +; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v1.l +; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v0.h, v5.l, v5.h +; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v6.l, v7.l +; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.h, v2.l, v1.h ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v1.h, v2.l, v2.h, v1.h -; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v1.l, v4.h, v0.h +; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v4.l, v4.h +; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v0.h, v2.h, v3.h ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v3.l, v1.h +; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v1.h, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v0.h ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l +; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v1.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l +; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v0.h ; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-FAKE16-LABEL: test_vector_reduce_umax_v16i8: @@ -1429,41 +1422,41 @@ define i8 @test_vector_reduce_umax_v16i8(<16 x i8> %v) { ; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v7, v7, v15 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v9 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v5, v5, v13 -; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v9 -; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v6, v6, v14 -; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v3, v3, v11, v7 -; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v4, v4, v12 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v7, v7, v15 +; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v3, v3, v11 ; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v8 +; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v5, v13 +; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v5, v6, v14 +; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v2, v2, v10 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v2, v2, v10, v6 -; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v5, v3 +; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v4, v12 +; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v3, v7 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v4, v2 +; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v2, v5 ; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v0 +; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v1 ; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -1940,18 +1933,18 @@ define i16 @test_vector_reduce_umax_v8i16(<8 x i16> %v) { ; GFX7-SDAG-LABEL: test_vector_reduce_umax_v8i16: ; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7 -; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6 -; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5 ; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX7-SDAG-NEXT: v_max_u32_e32 v2, v2, v6 -; GFX7-SDAG-NEXT: v_max_u32_e32 v3, v3, v7 -; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v5, v3 -; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v4, v2 +; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6 +; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7 +; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v4 +; GFX7-SDAG-NEXT: v_max_u32_e32 v1, v1, v5 +; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v3, v7 +; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v2, v6 ; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -2136,32 +2129,32 @@ define i16 @test_vector_reduce_umax_v16i16(<16 x i16> %v) { ; GFX7-SDAG-LABEL: test_vector_reduce_umax_v16i16: ; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xffff, v14 -; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6 -; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7 -; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xffff, v11 -; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v9 ; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX7-SDAG-NEXT: v_and_b32_e32 v13, 0xffff, v13 ; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5 -; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8 -; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7-SDAG-NEXT: v_and_b32_e32 v12, 0xffff, v12 ; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX7-SDAG-NEXT: v_max_u32_e32 v7, v7, v15 -; GFX7-SDAG-NEXT: v_max_u32_e32 v6, v6, v14 -; GFX7-SDAG-NEXT: v_max_u32_e32 v4, v4, v12 -; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v8 -; GFX7-SDAG-NEXT: v_max_u32_e32 v5, v5, v13 +; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xffff, v11 +; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xffff, v15 +; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7 +; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xffff, v10 +; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xffff, v14 +; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6 ; GFX7-SDAG-NEXT: v_max_u32_e32 v1, v1, v9 -; GFX7-SDAG-NEXT: v_max3_u32 v2, v2, v10, v6 -; GFX7-SDAG-NEXT: v_max3_u32 v3, v3, v11, v7 -; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v5, v3 -; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v4, v2 +; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v8 +; GFX7-SDAG-NEXT: v_max_u32_e32 v6, v6, v14 +; GFX7-SDAG-NEXT: v_max_u32_e32 v2, v2, v10 +; GFX7-SDAG-NEXT: v_max_u32_e32 v7, v7, v15 +; GFX7-SDAG-NEXT: v_max_u32_e32 v3, v3, v11 +; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v4, v12 +; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v5, v13 +; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v3, v7 +; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v2, v6 ; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-umin.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-umin.ll index e3a7ae5fd0256..115b05a5ca6a2 100644 --- a/llvm/test/CodeGen/AMDGPU/vector-reduce-umin.ll +++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-umin.ll @@ -485,18 +485,18 @@ define i8 @test_vector_reduce_umin_v8i8(<8 x i8> %v) { ; GFX7-SDAG-LABEL: test_vector_reduce_umin_v8i8: ; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX7-SDAG-NEXT: v_min_u32_e32 v2, v2, v6 -; GFX7-SDAG-NEXT: v_min_u32_e32 v3, v3, v7 -; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v5, v3 -; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v4, v2 +; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v4 +; GFX7-SDAG-NEXT: v_min_u32_e32 v1, v1, v5 +; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v3, v7 +; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v2, v6 ; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -549,15 +549,15 @@ define i8 @test_vector_reduce_umin_v8i8(<8 x i8> %v) { ; GFX9-SDAG-LABEL: test_vector_reduce_umin_v8i8: ; GFX9-SDAG: ; %bb.0: ; %entry ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX9-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX9-SDAG-NEXT: v_min_u16_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 -; GFX9-SDAG-NEXT: v_min3_u16 v1, v1, v5, v3 -; GFX9-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX9-SDAG-NEXT: v_min_u16_sdwa v2, v2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX9-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX9-SDAG-NEXT: v_min_u16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_min3_u16 v1, v1, v3, v7 +; GFX9-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX9-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX9-SDAG-NEXT: v_min_u16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-SDAG-NEXT: v_min3_u16 v0, v0, v4, v2 +; GFX9-SDAG-NEXT: v_min3_u16 v0, v0, v2, v6 ; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX9-SDAG-NEXT: v_min_u16_e32 v0, v0, v1 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -578,20 +578,20 @@ define i8 @test_vector_reduce_umin_v8i8(<8 x i8> %v) { ; GFX10-SDAG-LABEL: test_vector_reduce_umin_v8i8: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 ; GFX10-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX10-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX10-SDAG-NEXT: v_min_u16 v3, v3, v7 +; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX10-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 ; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX10-SDAG-NEXT: v_min_u16 v2, v2, v6 -; GFX10-SDAG-NEXT: v_min3_u16 v1, v1, v5, v3 -; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX10-SDAG-NEXT: v_min_u16 v1, v1, v5 +; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX10-SDAG-NEXT: v_min_u16 v0, v0, v4 +; GFX10-SDAG-NEXT: v_min3_u16 v1, v1, v3, v7 +; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v6 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 8 ; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX10-SDAG-NEXT: v_min3_u16 v0, v0, v3, v2 +; GFX10-SDAG-NEXT: v_min3_u16 v0, v0, v2, v3 ; GFX10-SDAG-NEXT: v_lshrrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX10-SDAG-NEXT: v_min_u16 v0, v0, v1 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -620,24 +620,24 @@ define i8 @test_vector_reduce_umin_v8i8(<8 x i8> %v) { ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_umin_v8i8: ; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v7.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v5.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v5.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v7.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v4.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l -; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v1.h, v3.l, v1.h -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v1.l, v1.l, v3.h, v1.h +; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v1.l, v1.l, v1.h ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v0.h +; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v1.l, v1.l, v3.l, v3.h +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v6.l ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.h, v1.h, v0.h -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v4.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v1.l, v0.h +; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v3 +; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v1.h, v1.l ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2 ; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v1.l @@ -646,23 +646,23 @@ define i8 @test_vector_reduce_umin_v8i8(<8 x i8> %v) { ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_umin_v8i8: ; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v3, v3, v7 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v5, v3 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v6 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v1, v1, v5 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v3, v7 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v6 ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v2, v2, v3 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v3 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v4, v2 +; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v2, v4 ; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v1 @@ -699,24 +699,24 @@ define i8 @test_vector_reduce_umin_v8i8(<8 x i8> %v) { ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v7.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v5.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v5.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v7.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v4.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l -; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v1.h, v3.l, v1.h -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v1.l, v1.l, v3.h, v1.h +; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v1.l, v1.l, v1.h ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v0.h +; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v1.l, v1.l, v3.l, v3.h +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v6.l ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.h, v1.h, v0.h -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v4.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v1.l, v0.h +; GFX12-SDAG-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v3 +; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v1.h, v1.l ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2 ; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v1.l @@ -729,23 +729,23 @@ define i8 @test_vector_reduce_umin_v8i8(<8 x i8> %v) { ; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v3, v3, v7 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v5, v3 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v6 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v1, v1, v5 +; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v3, v7 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v6 ; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v2, v2, v3 +; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v3 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v4, v2 +; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v2, v4 ; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v1 @@ -787,32 +787,32 @@ define i8 @test_vector_reduce_umin_v16i8(<16 x i8> %v) { ; GFX7-SDAG-LABEL: test_vector_reduce_umin_v16i8: ; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14 -; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xff, v8 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX7-SDAG-NEXT: v_and_b32_e32 v9, 0xff, v9 ; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX7-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13 ; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xff, v8 -; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX7-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12 ; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX7-SDAG-NEXT: v_min_u32_e32 v7, v7, v15 -; GFX7-SDAG-NEXT: v_min_u32_e32 v6, v6, v14 -; GFX7-SDAG-NEXT: v_min_u32_e32 v4, v4, v12 -; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v8 -; GFX7-SDAG-NEXT: v_min_u32_e32 v5, v5, v13 +; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15 +; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10 +; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14 +; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6 ; GFX7-SDAG-NEXT: v_min_u32_e32 v1, v1, v9 -; GFX7-SDAG-NEXT: v_min3_u32 v2, v2, v10, v6 -; GFX7-SDAG-NEXT: v_min3_u32 v3, v3, v11, v7 -; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v5, v3 -; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v4, v2 +; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v8 +; GFX7-SDAG-NEXT: v_min_u32_e32 v6, v6, v14 +; GFX7-SDAG-NEXT: v_min_u32_e32 v2, v2, v10 +; GFX7-SDAG-NEXT: v_min_u32_e32 v7, v7, v15 +; GFX7-SDAG-NEXT: v_min_u32_e32 v3, v3, v11 +; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v4, v12 +; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v5, v13 +; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v3, v7 +; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v2, v6 ; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -899,20 +899,20 @@ define i8 @test_vector_reduce_umin_v16i8(<16 x i8> %v) { ; GFX9-SDAG-LABEL: test_vector_reduce_umin_v16i8: ; GFX9-SDAG: ; %bb.0: ; %entry ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX9-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX9-SDAG-NEXT: v_min_u16_sdwa v7, v7, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 -; GFX9-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX9-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX9-SDAG-NEXT: v_min_u16_sdwa v5, v5, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13 +; GFX9-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX9-SDAG-NEXT: v_min_u16_sdwa v1, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 -; GFX9-SDAG-NEXT: v_min_u16_sdwa v6, v6, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 -; GFX9-SDAG-NEXT: v_min3_u16 v3, v3, v11, v7 -; GFX9-SDAG-NEXT: v_min_u16_sdwa v4, v4, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12 +; GFX9-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX9-SDAG-NEXT: v_min_u16_sdwa v7, v7, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_min_u16_sdwa v3, v3, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_min_u16_sdwa v0, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 -; GFX9-SDAG-NEXT: v_min3_u16 v2, v2, v10, v6 -; GFX9-SDAG-NEXT: v_min3_u16 v1, v1, v5, v3 -; GFX9-SDAG-NEXT: v_min3_u16 v0, v0, v4, v2 +; GFX9-SDAG-NEXT: v_min3_u16 v1, v1, v5, v13 +; GFX9-SDAG-NEXT: v_min_u16_sdwa v6, v6, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_min_u16_sdwa v2, v2, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-SDAG-NEXT: v_min3_u16 v0, v0, v4, v12 +; GFX9-SDAG-NEXT: v_min3_u16 v1, v1, v3, v7 +; GFX9-SDAG-NEXT: v_min3_u16 v0, v0, v2, v6 ; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 ; GFX9-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 8 @@ -944,32 +944,32 @@ define i8 @test_vector_reduce_umin_v16i8(<16 x i8> %v) { ; GFX10-SDAG-LABEL: test_vector_reduce_umin_v16i8: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX10-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX10-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14 -; GFX10-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6 ; GFX10-SDAG-NEXT: v_and_b32_e32 v9, 0xff, v9 +; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13 ; GFX10-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX10-SDAG-NEXT: v_min_u16 v7, v7, v15 -; GFX10-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX10-SDAG-NEXT: v_and_b32_e32 v8, 0xff, v8 ; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX10-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX10-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15 +; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX10-SDAG-NEXT: v_min_u16 v1, v1, v9 ; GFX10-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12 ; GFX10-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX10-SDAG-NEXT: v_min_u16 v5, v5, v13 -; GFX10-SDAG-NEXT: v_min_u16 v1, v1, v9 -; GFX10-SDAG-NEXT: v_min_u16 v6, v6, v14 -; GFX10-SDAG-NEXT: v_min3_u16 v3, v3, v11, v7 -; GFX10-SDAG-NEXT: v_min_u16 v4, v4, v12 +; GFX10-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10 +; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX10-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14 +; GFX10-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX10-SDAG-NEXT: v_min_u16 v7, v7, v15 +; GFX10-SDAG-NEXT: v_min_u16 v3, v3, v11 ; GFX10-SDAG-NEXT: v_min_u16 v0, v0, v8 -; GFX10-SDAG-NEXT: v_min3_u16 v2, v2, v10, v6 -; GFX10-SDAG-NEXT: v_min3_u16 v1, v1, v5, v3 -; GFX10-SDAG-NEXT: v_min3_u16 v0, v0, v4, v2 +; GFX10-SDAG-NEXT: v_min3_u16 v1, v1, v5, v13 +; GFX10-SDAG-NEXT: v_min_u16 v5, v6, v14 +; GFX10-SDAG-NEXT: v_min_u16 v2, v2, v10 +; GFX10-SDAG-NEXT: v_min3_u16 v0, v0, v4, v12 +; GFX10-SDAG-NEXT: v_min3_u16 v1, v1, v3, v7 +; GFX10-SDAG-NEXT: v_min3_u16 v0, v0, v2, v5 ; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v1, 8 @@ -1018,34 +1018,34 @@ define i8 @test_vector_reduce_umin_v16i8(<16 x i8> %v) { ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_umin_v16i8: ; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v10.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v15.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v7.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v11.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v14.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v9.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v13.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v9.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v13.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v5.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l -; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.h, v0.h, v10.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v8.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l -; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v12.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v11.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v15.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v7.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l +; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.h, v0.h, v9.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v12.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l -; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v4.h, v5.l, v4.h -; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v1.l, v1.l, v1.h -; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v1.h, v6.l, v6.h -; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v3.l, v3.h, v0.h -; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v3.l, v4.l, v5.h -; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v7.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v10.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v14.l +; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l +; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v3.h, v6.h, v3.h +; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v2.h, v3.l, v2.h +; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v0.h, v5.l, v5.h +; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v1.l, v6.l, v7.l +; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v1.h, v2.l, v1.h ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v1.h, v2.l, v2.h, v1.h -; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v1.l, v4.h, v0.h +; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v4.l, v4.h +; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v0.h, v2.h, v3.h ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v3.l, v1.h +; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v1.h, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v0.h ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h @@ -1061,34 +1061,34 @@ define i8 @test_vector_reduce_umin_v16i8(<16 x i8> %v) { ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_umin_v16i8: ; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v7, v7, v15 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v1, v1, v9 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v5, v5, v13 -; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v1, v1, v9 -; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v6, v6, v14 -; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v3, v3, v11, v7 -; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v4, v4, v12 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v7, v7, v15 +; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v3, v3, v11 ; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v8 +; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v5, v13 +; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v5, v6, v14 +; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v2, v2, v10 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v2, v2, v10, v6 -; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v5, v3 +; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v4, v12 +; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v3, v7 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v4, v2 +; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v2, v5 ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1 @@ -1147,34 +1147,34 @@ define i8 @test_vector_reduce_umin_v16i8(<16 x i8> %v) { ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v10.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v15.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v7.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v11.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v14.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v9.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v13.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v9.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v13.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v5.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l -; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.h, v0.h, v10.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v8.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l -; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v12.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v11.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v15.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v7.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l +; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.h, v0.h, v9.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v12.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l -; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v4.h, v5.l, v4.h -; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v1.l, v1.l, v1.h -; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v1.h, v6.l, v6.h -; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v3.l, v3.h, v0.h -; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v3.l, v4.l, v5.h -; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v7.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v10.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v14.l +; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l +; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v3.h, v6.h, v3.h +; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v2.h, v3.l, v2.h +; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v1.l +; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v0.h, v5.l, v5.h +; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v1.l, v6.l, v7.l +; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v1.h, v2.l, v1.h ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v1.h, v2.l, v2.h, v1.h -; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v1.l, v4.h, v0.h +; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v4.l, v4.h +; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v0.h, v2.h, v3.h ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v3.l, v1.h +; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v1.h, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v0.h ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h @@ -1194,34 +1194,34 @@ define i8 @test_vector_reduce_umin_v16i8(<16 x i8> %v) { ; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v7, v7, v15 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v1, v1, v9 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v5, v5, v13 -; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v1, v1, v9 -; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v6, v6, v14 -; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v3, v3, v11, v7 -; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v4, v4, v12 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14 +; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v7, v7, v15 +; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v3, v3, v11 ; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v8 +; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v5, v13 +; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v5, v6, v14 +; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v2, v2, v10 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v2, v2, v10, v6 -; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v5, v3 +; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v4, v12 +; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v3, v7 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v4, v2 +; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v2, v5 ; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1 @@ -1685,18 +1685,18 @@ define i16 @test_vector_reduce_umin_v8i16(<8 x i16> %v) { ; GFX7-SDAG-LABEL: test_vector_reduce_umin_v8i16: ; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7 -; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6 -; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5 ; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX7-SDAG-NEXT: v_min_u32_e32 v2, v2, v6 -; GFX7-SDAG-NEXT: v_min_u32_e32 v3, v3, v7 -; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v5, v3 -; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v4, v2 +; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6 +; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7 +; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v4 +; GFX7-SDAG-NEXT: v_min_u32_e32 v1, v1, v5 +; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v3, v7 +; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v2, v6 ; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -1878,32 +1878,32 @@ define i16 @test_vector_reduce_umin_v16i16(<16 x i16> %v) { ; GFX7-SDAG-LABEL: test_vector_reduce_umin_v16i16: ; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xffff, v14 -; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6 -; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7 -; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xffff, v11 -; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v9 ; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX7-SDAG-NEXT: v_and_b32_e32 v13, 0xffff, v13 ; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5 -; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8 -; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7-SDAG-NEXT: v_and_b32_e32 v12, 0xffff, v12 ; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX7-SDAG-NEXT: v_min_u32_e32 v7, v7, v15 -; GFX7-SDAG-NEXT: v_min_u32_e32 v6, v6, v14 -; GFX7-SDAG-NEXT: v_min_u32_e32 v4, v4, v12 -; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v8 -; GFX7-SDAG-NEXT: v_min_u32_e32 v5, v5, v13 +; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xffff, v11 +; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xffff, v15 +; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7 +; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xffff, v10 +; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xffff, v14 +; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6 ; GFX7-SDAG-NEXT: v_min_u32_e32 v1, v1, v9 -; GFX7-SDAG-NEXT: v_min3_u32 v2, v2, v10, v6 -; GFX7-SDAG-NEXT: v_min3_u32 v3, v3, v11, v7 -; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v5, v3 -; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v4, v2 +; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v8 +; GFX7-SDAG-NEXT: v_min_u32_e32 v6, v6, v14 +; GFX7-SDAG-NEXT: v_min_u32_e32 v2, v2, v10 +; GFX7-SDAG-NEXT: v_min_u32_e32 v7, v7, v15 +; GFX7-SDAG-NEXT: v_min_u32_e32 v3, v3, v11 +; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v4, v12 +; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v5, v13 +; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v3, v7 +; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v2, v6 ; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/X86/combine-add-ssat.ll b/llvm/test/CodeGen/X86/combine-add-ssat.ll index 3e217980d4a77..75adcddc7b2f3 100644 --- a/llvm/test/CodeGen/X86/combine-add-ssat.ll +++ b/llvm/test/CodeGen/X86/combine-add-ssat.ll @@ -62,12 +62,12 @@ define <8 x i16> @combine_constfold_v8i16() { define <8 x i16> @combine_constfold_undef_v8i16() { ; SSE-LABEL: combine_constfold_undef_v8i16: ; SSE: # %bb.0: -; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65534,0,65280,32768,0] +; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,u,65534,0,65280,32768,0] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_constfold_undef_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65535,65535,65534,0,65280,32768,0] +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65535,u,65534,0,65280,32768,0] ; AVX-NEXT: retq %res = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> , <8 x i16> ) ret <8 x i16> %res diff --git a/llvm/test/CodeGen/X86/combine-add-usat.ll b/llvm/test/CodeGen/X86/combine-add-usat.ll index 13bc3b26e2894..5b947dd6f2b55 100644 --- a/llvm/test/CodeGen/X86/combine-add-usat.ll +++ b/llvm/test/CodeGen/X86/combine-add-usat.ll @@ -62,12 +62,13 @@ define <8 x i16> @combine_constfold_v8i16() { define <8 x i16> @combine_constfold_undef_v8i16() { ; SSE-LABEL: combine_constfold_undef_v8i16: ; SSE: # %bb.0: -; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65535,65535,65535,2,65535] +; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,u,65535,65535,65535,2,65535] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_constfold_undef_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65535,65535,65535,65535,65535,2,65535] +; AVX-NEXT: vmovddup {{.*#+}} xmm0 = [65535,65535,2,65535,65535,65535,2,65535] +; AVX-NEXT: # xmm0 = mem[0,0] ; AVX-NEXT: retq %res = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> , <8 x i16> ) ret <8 x i16> %res diff --git a/llvm/test/CodeGen/X86/combine-sub-ssat.ll b/llvm/test/CodeGen/X86/combine-sub-ssat.ll index 979331faf19ec..0dab025c19cc1 100644 --- a/llvm/test/CodeGen/X86/combine-sub-ssat.ll +++ b/llvm/test/CodeGen/X86/combine-sub-ssat.ll @@ -62,12 +62,12 @@ define <8 x i16> @combine_constfold_v8i16() { define <8 x i16> @combine_constfold_undef_v8i16() { ; SSE-LABEL: combine_constfold_undef_v8i16: ; SSE: # %bb.0: -; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,65534,65282,32786,2] +; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,u,0,65534,65282,32786,2] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_constfold_undef_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,65534,65282,32786,2] +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,u,0,65534,65282,32786,2] ; AVX-NEXT: retq %res = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> , <8 x i16> ) ret <8 x i16> %res diff --git a/llvm/test/CodeGen/X86/combine-sub-usat.ll b/llvm/test/CodeGen/X86/combine-sub-usat.ll index b70e3fcd779c5..36e374bd2e67c 100644 --- a/llvm/test/CodeGen/X86/combine-sub-usat.ll +++ b/llvm/test/CodeGen/X86/combine-sub-usat.ll @@ -73,17 +73,17 @@ define <8 x i16> @combine_constfold_v8i16() { define <8 x i16> @combine_constfold_undef_v8i16() { ; SSE-LABEL: combine_constfold_undef_v8i16: ; SSE: # %bb.0: -; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,65534,0,0,0] +; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,u,0,65534,0,0,0] ; SSE-NEXT: retq ; ; AVX1-LABEL: combine_constfold_undef_v8i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,65534,0,0,0] +; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,u,0,65534,0,0,0] ; AVX1-NEXT: retq ; ; AVX2-LABEL: combine_constfold_undef_v8i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,65534,0,0,0] +; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,u,0,65534,0,0,0] ; AVX2-NEXT: retq ; ; AVX512-LABEL: combine_constfold_undef_v8i16: diff --git a/llvm/test/CodeGen/X86/load-combine.ll b/llvm/test/CodeGen/X86/load-combine.ll index b5f3e78991881..f21c07599d6f1 100644 --- a/llvm/test/CodeGen/X86/load-combine.ll +++ b/llvm/test/CodeGen/X86/load-combine.ll @@ -800,13 +800,13 @@ define void @shift_i32_by_32(ptr %src1, ptr %src2, ptr %dst) { ; CHECK-LABEL: shift_i32_by_32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl $-1, 4(%eax) -; CHECK-NEXT: movl $-1, (%eax) +; CHECK-NEXT: movl $0, 4(%eax) +; CHECK-NEXT: movl $0, (%eax) ; CHECK-NEXT: retl ; ; CHECK64-LABEL: shift_i32_by_32: ; CHECK64: # %bb.0: # %entry -; CHECK64-NEXT: movq $-1, (%rdx) +; CHECK64-NEXT: movq $0, (%rdx) ; CHECK64-NEXT: retq entry: %load1 = load i8, ptr %src1, align 1 diff --git a/llvm/test/CodeGen/X86/pr33960.ll b/llvm/test/CodeGen/X86/pr33960.ll index 44fe777e6d140..6ee270e406892 100644 --- a/llvm/test/CodeGen/X86/pr33960.ll +++ b/llvm/test/CodeGen/X86/pr33960.ll @@ -7,12 +7,10 @@ define void @PR33960() { ; X86-LABEL: PR33960: ; X86: # %bb.0: # %entry -; X86-NEXT: movl $-1, b ; X86-NEXT: retl ; ; X64-LABEL: PR33960: ; X64: # %bb.0: # %entry -; X64-NEXT: movl $-1, b(%rip) ; X64-NEXT: retq entry: %tmp = insertelement <4 x i32> , i32 -2, i32 3 diff --git a/llvm/unittests/CodeGen/SelectionDAGNodeConstructionTest.cpp b/llvm/unittests/CodeGen/SelectionDAGNodeConstructionTest.cpp index 1016790637b82..65363b32a9005 100644 --- a/llvm/unittests/CodeGen/SelectionDAGNodeConstructionTest.cpp +++ b/llvm/unittests/CodeGen/SelectionDAGNodeConstructionTest.cpp @@ -1,4 +1,4 @@ -//===---- llvm/unittest/CodeGen/SelectionDAGPatternMatchTest.cpp ---------===// +//===---- llvm/unittest/CodeGen/SelectionDAGPatternMatchTest.cpp ----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -22,8 +22,7 @@ TEST_F(SelectionDAGNodeConstructionTest, ADD) { EXPECT_EQ(DAG->getNode(ISD::ADD, DL, Int32VT, Op, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::ADD, DL, Int32VT, Poison, Op), Poison); EXPECT_EQ(DAG->getNode(ISD::ADD, DL, Int32VT, Poison, Undef), Poison); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::ADD, DL, Int32VT, Undef, Poison), Undef); + EXPECT_EQ(DAG->getNode(ISD::ADD, DL, Int32VT, Undef, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::ADD, DL, Int32VT, Op, Undef), Undef); EXPECT_EQ(DAG->getNode(ISD::ADD, DL, Int32VT, Undef, Op), Undef); @@ -41,13 +40,11 @@ TEST_F(SelectionDAGNodeConstructionTest, AND) { EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Op, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Poison, Op), Poison); EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Poison, Undef), Poison); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Undef, Poison), Zero); + EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Undef, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Op, Undef), Zero); EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Undef, Op), Zero); - // TODO: Should be undef. - EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Undef, Undef), Zero); + EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Undef, Undef), Undef); } TEST_F(SelectionDAGNodeConstructionTest, MUL) { @@ -61,13 +58,11 @@ TEST_F(SelectionDAGNodeConstructionTest, MUL) { EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Op, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Poison, Op), Poison); EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Poison, Undef), Poison); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Undef, Poison), Zero); + EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Undef, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Op, Undef), Zero); EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Undef, Op), Zero); - // TODO: Should be undef. - EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Undef, Undef), Zero); + EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Undef, Undef), Undef); } TEST_F(SelectionDAGNodeConstructionTest, OR) { @@ -81,13 +76,11 @@ TEST_F(SelectionDAGNodeConstructionTest, OR) { EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Op, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Poison, Op), Poison); EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Poison, Undef), Poison); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Undef, Poison), AllOnes); + EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Undef, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Op, Undef), AllOnes); EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Undef, Op), AllOnes); - // TODO: Should be undef. - EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Undef, Undef), AllOnes); + EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Undef, Undef), Undef); } TEST_F(SelectionDAGNodeConstructionTest, SADDSAT) { @@ -101,13 +94,11 @@ TEST_F(SelectionDAGNodeConstructionTest, SADDSAT) { EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Op, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Poison, Op), Poison); EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Poison, Undef), Poison); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Undef, Poison), AllOnes); + EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Undef, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Op, Undef), AllOnes); EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Undef, Op), AllOnes); - // TODO: Should be undef. - EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Undef, Undef), AllOnes); + EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Undef, Undef), Undef); } TEST_F(SelectionDAGNodeConstructionTest, SDIV) { @@ -118,19 +109,50 @@ TEST_F(SelectionDAGNodeConstructionTest, SDIV) { SDValue Undef = DAG->getUNDEF(Int32VT); SDValue Zero = DAG->getConstant(0, DL, Int32VT); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Op, Poison), Undef); + EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Op, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Poison, Op), Poison); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Poison, Undef), Undef); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Undef, Poison), Undef); + EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Undef, Poison), Poison); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Op, Undef), Undef); + EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Op, Undef), Poison); EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Undef, Op), Zero); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Undef, Undef), Undef); + EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Undef, Undef), Poison); +} + +TEST_F(SelectionDAGNodeConstructionTest, SMAX) { + SDLoc DL; + auto Int32VT = EVT::getIntegerVT(Context, 32); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); + SDValue Poison = DAG->getPOISON(Int32VT); + SDValue Undef = DAG->getUNDEF(Int32VT); + SDValue MaxInt = DAG->getConstant(APInt::getSignedMaxValue(32), DL, Int32VT); + + EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, Int32VT, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, Int32VT, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, Int32VT, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, Int32VT, Undef, Poison), Poison); + + EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, Int32VT, Op, Undef), MaxInt); + EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, Int32VT, Undef, Op), MaxInt); + EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, Int32VT, Undef, Undef), Undef); +} + +TEST_F(SelectionDAGNodeConstructionTest, SMIN) { + SDLoc DL; + auto Int32VT = EVT::getIntegerVT(Context, 32); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); + SDValue Poison = DAG->getPOISON(Int32VT); + SDValue Undef = DAG->getUNDEF(Int32VT); + SDValue MinInt = DAG->getConstant(APInt::getSignedMinValue(32), DL, Int32VT); + + EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, Int32VT, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, Int32VT, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, Int32VT, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, Int32VT, Undef, Poison), Poison); + + EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, Int32VT, Op, Undef), MinInt); + EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, Int32VT, Undef, Op), MinInt); + EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, Int32VT, Undef, Undef), Undef); } TEST_F(SelectionDAGNodeConstructionTest, SREM) { @@ -141,19 +163,14 @@ TEST_F(SelectionDAGNodeConstructionTest, SREM) { SDValue Undef = DAG->getUNDEF(Int32VT); SDValue Zero = DAG->getConstant(0, DL, Int32VT); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Op, Poison), Undef); + EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Op, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Poison, Op), Poison); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Poison, Undef), Undef); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Undef, Poison), Undef); + EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Undef, Poison), Poison); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Op, Undef), Undef); + EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Op, Undef), Poison); EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Undef, Op), Zero); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Undef, Undef), Undef); + EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Undef, Undef), Poison); } TEST_F(SelectionDAGNodeConstructionTest, SSUBSAT) { @@ -167,13 +184,11 @@ TEST_F(SelectionDAGNodeConstructionTest, SSUBSAT) { EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Op, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Poison, Op), Poison); EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Poison, Undef), Poison); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Undef, Poison), Zero); + EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Undef, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Op, Undef), Zero); EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Undef, Op), Zero); - // TODO: Should be undef. - EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Undef, Undef), Zero); + EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Undef, Undef), Undef); } TEST_F(SelectionDAGNodeConstructionTest, SUB) { @@ -186,8 +201,7 @@ TEST_F(SelectionDAGNodeConstructionTest, SUB) { EXPECT_EQ(DAG->getNode(ISD::SUB, DL, Int32VT, Op, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::SUB, DL, Int32VT, Poison, Op), Poison); EXPECT_EQ(DAG->getNode(ISD::SUB, DL, Int32VT, Poison, Undef), Poison); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::SUB, DL, Int32VT, Undef, Poison), Undef); + EXPECT_EQ(DAG->getNode(ISD::SUB, DL, Int32VT, Undef, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::SUB, DL, Int32VT, Op, Undef), Undef); EXPECT_EQ(DAG->getNode(ISD::SUB, DL, Int32VT, Undef, Op), Undef); @@ -205,13 +219,11 @@ TEST_F(SelectionDAGNodeConstructionTest, UADDSAT) { EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Op, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Poison, Op), Poison); EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Poison, Undef), Poison); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Undef, Poison), AllOnes); + EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Undef, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Op, Undef), AllOnes); EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Undef, Op), AllOnes); - // TODO: Should be undef. - EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Undef, Undef), AllOnes); + EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Undef, Undef), Undef); } TEST_F(SelectionDAGNodeConstructionTest, UDIV) { @@ -222,19 +234,50 @@ TEST_F(SelectionDAGNodeConstructionTest, UDIV) { SDValue Undef = DAG->getUNDEF(Int32VT); SDValue Zero = DAG->getConstant(0, DL, Int32VT); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Op, Poison), Undef); + EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Op, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Poison, Op), Poison); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Poison, Undef), Undef); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Undef, Poison), Undef); + EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Undef, Poison), Poison); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Op, Undef), Undef); + EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Op, Undef), Poison); EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Undef, Op), Zero); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Undef, Undef), Undef); + EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Undef, Undef), Poison); +} + +TEST_F(SelectionDAGNodeConstructionTest, UMAX) { + SDLoc DL; + auto Int32VT = EVT::getIntegerVT(Context, 32); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); + SDValue Poison = DAG->getPOISON(Int32VT); + SDValue Undef = DAG->getUNDEF(Int32VT); + SDValue AllOnes = DAG->getAllOnesConstant(DL, Int32VT); + + EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, Int32VT, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, Int32VT, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, Int32VT, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, Int32VT, Undef, Poison), Poison); + + EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, Int32VT, Op, Undef), AllOnes); + EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, Int32VT, Undef, Op), AllOnes); + EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, Int32VT, Undef, Undef), Undef); +} + +TEST_F(SelectionDAGNodeConstructionTest, UMIN) { + SDLoc DL; + auto Int32VT = EVT::getIntegerVT(Context, 32); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); + SDValue Poison = DAG->getPOISON(Int32VT); + SDValue Undef = DAG->getUNDEF(Int32VT); + SDValue Zero = DAG->getConstant(0, DL, Int32VT); + + EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, Int32VT, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, Int32VT, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, Int32VT, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, Int32VT, Undef, Poison), Poison); + + EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, Int32VT, Op, Undef), Zero); + EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, Int32VT, Undef, Op), Zero); + EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, Int32VT, Undef, Undef), Undef); } TEST_F(SelectionDAGNodeConstructionTest, UREM) { @@ -245,19 +288,14 @@ TEST_F(SelectionDAGNodeConstructionTest, UREM) { SDValue Undef = DAG->getUNDEF(Int32VT); SDValue Zero = DAG->getConstant(0, DL, Int32VT); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Op, Poison), Undef); + EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Op, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Poison, Op), Poison); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Poison, Undef), Undef); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Undef, Poison), Undef); + EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Undef, Poison), Poison); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Op, Undef), Undef); + EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Op, Undef), Poison); EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Undef, Op), Zero); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Undef, Undef), Undef); + EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Undef, Undef), Poison); } TEST_F(SelectionDAGNodeConstructionTest, USUBSAT) { @@ -271,13 +309,11 @@ TEST_F(SelectionDAGNodeConstructionTest, USUBSAT) { EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Op, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Poison, Op), Poison); EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Poison, Undef), Poison); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Undef, Poison), Zero); + EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Undef, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Op, Undef), Zero); EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Undef, Op), Zero); - // TODO: Should be undef. - EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Undef, Undef), Zero); + EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Undef, Undef), Undef); } TEST_F(SelectionDAGNodeConstructionTest, XOR) { @@ -290,10 +326,8 @@ TEST_F(SelectionDAGNodeConstructionTest, XOR) { EXPECT_EQ(DAG->getNode(ISD::XOR, DL, Int32VT, Op, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::XOR, DL, Int32VT, Poison, Op), Poison); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::XOR, DL, Int32VT, Poison, Undef), Zero); - // TODO: Should be poison. - EXPECT_EQ(DAG->getNode(ISD::XOR, DL, Int32VT, Undef, Poison), Zero); + EXPECT_EQ(DAG->getNode(ISD::XOR, DL, Int32VT, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::XOR, DL, Int32VT, Undef, Poison), Poison); EXPECT_EQ(DAG->getNode(ISD::XOR, DL, Int32VT, Op, Undef), Undef); EXPECT_EQ(DAG->getNode(ISD::XOR, DL, Int32VT, Undef, Op), Undef); diff --git a/llvm/unittests/CodeGen/SelectionDAGTestBase.h b/llvm/unittests/CodeGen/SelectionDAGTestBase.h index 8daf8f398449c..edc730d7f9b45 100644 --- a/llvm/unittests/CodeGen/SelectionDAGTestBase.h +++ b/llvm/unittests/CodeGen/SelectionDAGTestBase.h @@ -1,4 +1,4 @@ -//===---- llvm/unittest/CodeGen/SelectionDAGPatternMatchTest.cpp ---------===// +//===---- llvm/unittest/CodeGen/SelectionDAGTestBase.h --------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. From d87747cf4ac647576461894af68330d2eaf4947e Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Tue, 29 Jul 2025 15:23:45 +0000 Subject: [PATCH 3/3] Use MVT::i32 instead of Int32VT indirection. --- .../SelectionDAGNodeConstructionTest.cpp | 482 +++++++++--------- 1 file changed, 232 insertions(+), 250 deletions(-) diff --git a/llvm/unittests/CodeGen/SelectionDAGNodeConstructionTest.cpp b/llvm/unittests/CodeGen/SelectionDAGNodeConstructionTest.cpp index 65363b32a9005..b2c1420215c3f 100644 --- a/llvm/unittests/CodeGen/SelectionDAGNodeConstructionTest.cpp +++ b/llvm/unittests/CodeGen/SelectionDAGNodeConstructionTest.cpp @@ -14,322 +14,304 @@ class SelectionDAGNodeConstructionTest : public SelectionDAGTestBase {}; TEST_F(SelectionDAGNodeConstructionTest, ADD) { SDLoc DL; - EVT Int32VT = EVT::getIntegerVT(Context, 32); - SDValue Poison = DAG->getPOISON(Int32VT); - SDValue Undef = DAG->getUNDEF(Int32VT); - SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); - - EXPECT_EQ(DAG->getNode(ISD::ADD, DL, Int32VT, Op, Poison), Poison); - EXPECT_EQ(DAG->getNode(ISD::ADD, DL, Int32VT, Poison, Op), Poison); - EXPECT_EQ(DAG->getNode(ISD::ADD, DL, Int32VT, Poison, Undef), Poison); - EXPECT_EQ(DAG->getNode(ISD::ADD, DL, Int32VT, Undef, Poison), Poison); - - EXPECT_EQ(DAG->getNode(ISD::ADD, DL, Int32VT, Op, Undef), Undef); - EXPECT_EQ(DAG->getNode(ISD::ADD, DL, Int32VT, Undef, Op), Undef); - EXPECT_EQ(DAG->getNode(ISD::ADD, DL, Int32VT, Undef, Undef), Undef); + SDValue Poison = DAG->getPOISON(MVT::i32); + SDValue Undef = DAG->getUNDEF(MVT::i32); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32); + + EXPECT_EQ(DAG->getNode(ISD::ADD, DL, MVT::i32, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::ADD, DL, MVT::i32, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::ADD, DL, MVT::i32, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::ADD, DL, MVT::i32, Undef, Poison), Poison); + + EXPECT_EQ(DAG->getNode(ISD::ADD, DL, MVT::i32, Op, Undef), Undef); + EXPECT_EQ(DAG->getNode(ISD::ADD, DL, MVT::i32, Undef, Op), Undef); + EXPECT_EQ(DAG->getNode(ISD::ADD, DL, MVT::i32, Undef, Undef), Undef); } TEST_F(SelectionDAGNodeConstructionTest, AND) { SDLoc DL; - EVT Int32VT = EVT::getIntegerVT(Context, 32); - SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); - SDValue Poison = DAG->getPOISON(Int32VT); - SDValue Undef = DAG->getUNDEF(Int32VT); - SDValue Zero = DAG->getConstant(0, DL, Int32VT); - - EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Op, Poison), Poison); - EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Poison, Op), Poison); - EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Poison, Undef), Poison); - EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Undef, Poison), Poison); - - EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Op, Undef), Zero); - EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Undef, Op), Zero); - EXPECT_EQ(DAG->getNode(ISD::AND, DL, Int32VT, Undef, Undef), Undef); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32); + SDValue Poison = DAG->getPOISON(MVT::i32); + SDValue Undef = DAG->getUNDEF(MVT::i32); + SDValue Zero = DAG->getConstant(0, DL, MVT::i32); + + EXPECT_EQ(DAG->getNode(ISD::AND, DL, MVT::i32, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::AND, DL, MVT::i32, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::AND, DL, MVT::i32, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::AND, DL, MVT::i32, Undef, Poison), Poison); + + EXPECT_EQ(DAG->getNode(ISD::AND, DL, MVT::i32, Op, Undef), Zero); + EXPECT_EQ(DAG->getNode(ISD::AND, DL, MVT::i32, Undef, Op), Zero); + EXPECT_EQ(DAG->getNode(ISD::AND, DL, MVT::i32, Undef, Undef), Undef); } TEST_F(SelectionDAGNodeConstructionTest, MUL) { SDLoc DL; - EVT Int32VT = EVT::getIntegerVT(Context, 32); - SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); - SDValue Poison = DAG->getPOISON(Int32VT); - SDValue Undef = DAG->getUNDEF(Int32VT); - SDValue Zero = DAG->getConstant(0, DL, Int32VT); - - EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Op, Poison), Poison); - EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Poison, Op), Poison); - EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Poison, Undef), Poison); - EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Undef, Poison), Poison); - - EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Op, Undef), Zero); - EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Undef, Op), Zero); - EXPECT_EQ(DAG->getNode(ISD::MUL, DL, Int32VT, Undef, Undef), Undef); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32); + SDValue Poison = DAG->getPOISON(MVT::i32); + SDValue Undef = DAG->getUNDEF(MVT::i32); + SDValue Zero = DAG->getConstant(0, DL, MVT::i32); + + EXPECT_EQ(DAG->getNode(ISD::MUL, DL, MVT::i32, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::MUL, DL, MVT::i32, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::MUL, DL, MVT::i32, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::MUL, DL, MVT::i32, Undef, Poison), Poison); + + EXPECT_EQ(DAG->getNode(ISD::MUL, DL, MVT::i32, Op, Undef), Zero); + EXPECT_EQ(DAG->getNode(ISD::MUL, DL, MVT::i32, Undef, Op), Zero); + EXPECT_EQ(DAG->getNode(ISD::MUL, DL, MVT::i32, Undef, Undef), Undef); } TEST_F(SelectionDAGNodeConstructionTest, OR) { SDLoc DL; - EVT Int32VT = EVT::getIntegerVT(Context, 32); - SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); - SDValue Poison = DAG->getPOISON(Int32VT); - SDValue Undef = DAG->getUNDEF(Int32VT); - SDValue AllOnes = DAG->getAllOnesConstant(DL, Int32VT); - - EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Op, Poison), Poison); - EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Poison, Op), Poison); - EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Poison, Undef), Poison); - EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Undef, Poison), Poison); - - EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Op, Undef), AllOnes); - EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Undef, Op), AllOnes); - EXPECT_EQ(DAG->getNode(ISD::OR, DL, Int32VT, Undef, Undef), Undef); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32); + SDValue Poison = DAG->getPOISON(MVT::i32); + SDValue Undef = DAG->getUNDEF(MVT::i32); + SDValue AllOnes = DAG->getAllOnesConstant(DL, MVT::i32); + + EXPECT_EQ(DAG->getNode(ISD::OR, DL, MVT::i32, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::OR, DL, MVT::i32, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::OR, DL, MVT::i32, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::OR, DL, MVT::i32, Undef, Poison), Poison); + + EXPECT_EQ(DAG->getNode(ISD::OR, DL, MVT::i32, Op, Undef), AllOnes); + EXPECT_EQ(DAG->getNode(ISD::OR, DL, MVT::i32, Undef, Op), AllOnes); + EXPECT_EQ(DAG->getNode(ISD::OR, DL, MVT::i32, Undef, Undef), Undef); } TEST_F(SelectionDAGNodeConstructionTest, SADDSAT) { SDLoc DL; - EVT Int32VT = EVT::getIntegerVT(Context, 32); - SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); - SDValue Poison = DAG->getPOISON(Int32VT); - SDValue Undef = DAG->getUNDEF(Int32VT); - SDValue AllOnes = DAG->getAllOnesConstant(DL, Int32VT); - - EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Op, Poison), Poison); - EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Poison, Op), Poison); - EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Poison, Undef), Poison); - EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Undef, Poison), Poison); - - EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Op, Undef), AllOnes); - EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Undef, Op), AllOnes); - EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, Int32VT, Undef, Undef), Undef); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32); + SDValue Poison = DAG->getPOISON(MVT::i32); + SDValue Undef = DAG->getUNDEF(MVT::i32); + SDValue AllOnes = DAG->getAllOnesConstant(DL, MVT::i32); + + EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, MVT::i32, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, MVT::i32, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, MVT::i32, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, MVT::i32, Undef, Poison), Poison); + + EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, MVT::i32, Op, Undef), AllOnes); + EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, MVT::i32, Undef, Op), AllOnes); + EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, MVT::i32, Undef, Undef), Undef); } TEST_F(SelectionDAGNodeConstructionTest, SDIV) { SDLoc DL; - EVT Int32VT = EVT::getIntegerVT(Context, 32); - SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); - SDValue Poison = DAG->getPOISON(Int32VT); - SDValue Undef = DAG->getUNDEF(Int32VT); - SDValue Zero = DAG->getConstant(0, DL, Int32VT); - - EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Op, Poison), Poison); - EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Poison, Op), Poison); - EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Poison, Undef), Poison); - EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Undef, Poison), Poison); - - EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Op, Undef), Poison); - EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Undef, Op), Zero); - EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, Int32VT, Undef, Undef), Poison); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32); + SDValue Poison = DAG->getPOISON(MVT::i32); + SDValue Undef = DAG->getUNDEF(MVT::i32); + SDValue Zero = DAG->getConstant(0, DL, MVT::i32); + + EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, MVT::i32, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, MVT::i32, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, MVT::i32, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, MVT::i32, Undef, Poison), Poison); + + EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, MVT::i32, Op, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, MVT::i32, Undef, Op), Zero); + EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, MVT::i32, Undef, Undef), Poison); } TEST_F(SelectionDAGNodeConstructionTest, SMAX) { SDLoc DL; - auto Int32VT = EVT::getIntegerVT(Context, 32); - SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); - SDValue Poison = DAG->getPOISON(Int32VT); - SDValue Undef = DAG->getUNDEF(Int32VT); - SDValue MaxInt = DAG->getConstant(APInt::getSignedMaxValue(32), DL, Int32VT); - - EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, Int32VT, Op, Poison), Poison); - EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, Int32VT, Poison, Op), Poison); - EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, Int32VT, Poison, Undef), Poison); - EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, Int32VT, Undef, Poison), Poison); - - EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, Int32VT, Op, Undef), MaxInt); - EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, Int32VT, Undef, Op), MaxInt); - EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, Int32VT, Undef, Undef), Undef); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32); + SDValue Poison = DAG->getPOISON(MVT::i32); + SDValue Undef = DAG->getUNDEF(MVT::i32); + SDValue MaxInt = DAG->getConstant(APInt::getSignedMaxValue(32), DL, MVT::i32); + + EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, MVT::i32, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, MVT::i32, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, MVT::i32, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, MVT::i32, Undef, Poison), Poison); + + EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, MVT::i32, Op, Undef), MaxInt); + EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, MVT::i32, Undef, Op), MaxInt); + EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, MVT::i32, Undef, Undef), Undef); } TEST_F(SelectionDAGNodeConstructionTest, SMIN) { SDLoc DL; - auto Int32VT = EVT::getIntegerVT(Context, 32); - SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); - SDValue Poison = DAG->getPOISON(Int32VT); - SDValue Undef = DAG->getUNDEF(Int32VT); - SDValue MinInt = DAG->getConstant(APInt::getSignedMinValue(32), DL, Int32VT); - - EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, Int32VT, Op, Poison), Poison); - EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, Int32VT, Poison, Op), Poison); - EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, Int32VT, Poison, Undef), Poison); - EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, Int32VT, Undef, Poison), Poison); - - EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, Int32VT, Op, Undef), MinInt); - EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, Int32VT, Undef, Op), MinInt); - EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, Int32VT, Undef, Undef), Undef); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32); + SDValue Poison = DAG->getPOISON(MVT::i32); + SDValue Undef = DAG->getUNDEF(MVT::i32); + SDValue MinInt = DAG->getConstant(APInt::getSignedMinValue(32), DL, MVT::i32); + + EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, MVT::i32, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, MVT::i32, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, MVT::i32, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, MVT::i32, Undef, Poison), Poison); + + EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, MVT::i32, Op, Undef), MinInt); + EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, MVT::i32, Undef, Op), MinInt); + EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, MVT::i32, Undef, Undef), Undef); } TEST_F(SelectionDAGNodeConstructionTest, SREM) { SDLoc DL; - EVT Int32VT = EVT::getIntegerVT(Context, 32); - SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); - SDValue Poison = DAG->getPOISON(Int32VT); - SDValue Undef = DAG->getUNDEF(Int32VT); - SDValue Zero = DAG->getConstant(0, DL, Int32VT); - - EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Op, Poison), Poison); - EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Poison, Op), Poison); - EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Poison, Undef), Poison); - EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Undef, Poison), Poison); - - EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Op, Undef), Poison); - EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Undef, Op), Zero); - EXPECT_EQ(DAG->getNode(ISD::SREM, DL, Int32VT, Undef, Undef), Poison); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32); + SDValue Poison = DAG->getPOISON(MVT::i32); + SDValue Undef = DAG->getUNDEF(MVT::i32); + SDValue Zero = DAG->getConstant(0, DL, MVT::i32); + + EXPECT_EQ(DAG->getNode(ISD::SREM, DL, MVT::i32, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::SREM, DL, MVT::i32, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::SREM, DL, MVT::i32, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::SREM, DL, MVT::i32, Undef, Poison), Poison); + + EXPECT_EQ(DAG->getNode(ISD::SREM, DL, MVT::i32, Op, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::SREM, DL, MVT::i32, Undef, Op), Zero); + EXPECT_EQ(DAG->getNode(ISD::SREM, DL, MVT::i32, Undef, Undef), Poison); } TEST_F(SelectionDAGNodeConstructionTest, SSUBSAT) { SDLoc DL; - EVT Int32VT = EVT::getIntegerVT(Context, 32); - SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); - SDValue Poison = DAG->getPOISON(Int32VT); - SDValue Undef = DAG->getUNDEF(Int32VT); - SDValue Zero = DAG->getConstant(0, DL, Int32VT); - - EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Op, Poison), Poison); - EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Poison, Op), Poison); - EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Poison, Undef), Poison); - EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Undef, Poison), Poison); - - EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Op, Undef), Zero); - EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Undef, Op), Zero); - EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, Int32VT, Undef, Undef), Undef); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32); + SDValue Poison = DAG->getPOISON(MVT::i32); + SDValue Undef = DAG->getUNDEF(MVT::i32); + SDValue Zero = DAG->getConstant(0, DL, MVT::i32); + + EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, MVT::i32, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, MVT::i32, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, MVT::i32, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, MVT::i32, Undef, Poison), Poison); + + EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, MVT::i32, Op, Undef), Zero); + EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, MVT::i32, Undef, Op), Zero); + EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, MVT::i32, Undef, Undef), Undef); } TEST_F(SelectionDAGNodeConstructionTest, SUB) { SDLoc DL; - EVT Int32VT = EVT::getIntegerVT(Context, 32); - SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); - SDValue Poison = DAG->getPOISON(Int32VT); - SDValue Undef = DAG->getUNDEF(Int32VT); - - EXPECT_EQ(DAG->getNode(ISD::SUB, DL, Int32VT, Op, Poison), Poison); - EXPECT_EQ(DAG->getNode(ISD::SUB, DL, Int32VT, Poison, Op), Poison); - EXPECT_EQ(DAG->getNode(ISD::SUB, DL, Int32VT, Poison, Undef), Poison); - EXPECT_EQ(DAG->getNode(ISD::SUB, DL, Int32VT, Undef, Poison), Poison); - - EXPECT_EQ(DAG->getNode(ISD::SUB, DL, Int32VT, Op, Undef), Undef); - EXPECT_EQ(DAG->getNode(ISD::SUB, DL, Int32VT, Undef, Op), Undef); - EXPECT_EQ(DAG->getNode(ISD::SUB, DL, Int32VT, Undef, Undef), Undef); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32); + SDValue Poison = DAG->getPOISON(MVT::i32); + SDValue Undef = DAG->getUNDEF(MVT::i32); + + EXPECT_EQ(DAG->getNode(ISD::SUB, DL, MVT::i32, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::SUB, DL, MVT::i32, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::SUB, DL, MVT::i32, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::SUB, DL, MVT::i32, Undef, Poison), Poison); + + EXPECT_EQ(DAG->getNode(ISD::SUB, DL, MVT::i32, Op, Undef), Undef); + EXPECT_EQ(DAG->getNode(ISD::SUB, DL, MVT::i32, Undef, Op), Undef); + EXPECT_EQ(DAG->getNode(ISD::SUB, DL, MVT::i32, Undef, Undef), Undef); } TEST_F(SelectionDAGNodeConstructionTest, UADDSAT) { SDLoc DL; - EVT Int32VT = EVT::getIntegerVT(Context, 32); - SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); - SDValue Poison = DAG->getPOISON(Int32VT); - SDValue Undef = DAG->getUNDEF(Int32VT); - SDValue AllOnes = DAG->getAllOnesConstant(DL, Int32VT); - - EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Op, Poison), Poison); - EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Poison, Op), Poison); - EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Poison, Undef), Poison); - EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Undef, Poison), Poison); - - EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Op, Undef), AllOnes); - EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Undef, Op), AllOnes); - EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, Int32VT, Undef, Undef), Undef); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32); + SDValue Poison = DAG->getPOISON(MVT::i32); + SDValue Undef = DAG->getUNDEF(MVT::i32); + SDValue AllOnes = DAG->getAllOnesConstant(DL, MVT::i32); + + EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, MVT::i32, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, MVT::i32, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, MVT::i32, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, MVT::i32, Undef, Poison), Poison); + + EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, MVT::i32, Op, Undef), AllOnes); + EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, MVT::i32, Undef, Op), AllOnes); + EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, MVT::i32, Undef, Undef), Undef); } TEST_F(SelectionDAGNodeConstructionTest, UDIV) { SDLoc DL; - EVT Int32VT = EVT::getIntegerVT(Context, 32); - SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); - SDValue Poison = DAG->getPOISON(Int32VT); - SDValue Undef = DAG->getUNDEF(Int32VT); - SDValue Zero = DAG->getConstant(0, DL, Int32VT); - - EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Op, Poison), Poison); - EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Poison, Op), Poison); - EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Poison, Undef), Poison); - EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Undef, Poison), Poison); - - EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Op, Undef), Poison); - EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Undef, Op), Zero); - EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, Int32VT, Undef, Undef), Poison); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32); + SDValue Poison = DAG->getPOISON(MVT::i32); + SDValue Undef = DAG->getUNDEF(MVT::i32); + SDValue Zero = DAG->getConstant(0, DL, MVT::i32); + + EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, MVT::i32, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, MVT::i32, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, MVT::i32, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, MVT::i32, Undef, Poison), Poison); + + EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, MVT::i32, Op, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, MVT::i32, Undef, Op), Zero); + EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, MVT::i32, Undef, Undef), Poison); } TEST_F(SelectionDAGNodeConstructionTest, UMAX) { SDLoc DL; - auto Int32VT = EVT::getIntegerVT(Context, 32); - SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); - SDValue Poison = DAG->getPOISON(Int32VT); - SDValue Undef = DAG->getUNDEF(Int32VT); - SDValue AllOnes = DAG->getAllOnesConstant(DL, Int32VT); - - EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, Int32VT, Op, Poison), Poison); - EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, Int32VT, Poison, Op), Poison); - EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, Int32VT, Poison, Undef), Poison); - EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, Int32VT, Undef, Poison), Poison); - - EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, Int32VT, Op, Undef), AllOnes); - EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, Int32VT, Undef, Op), AllOnes); - EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, Int32VT, Undef, Undef), Undef); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32); + SDValue Poison = DAG->getPOISON(MVT::i32); + SDValue Undef = DAG->getUNDEF(MVT::i32); + SDValue AllOnes = DAG->getAllOnesConstant(DL, MVT::i32); + + EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, MVT::i32, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, MVT::i32, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, MVT::i32, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, MVT::i32, Undef, Poison), Poison); + + EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, MVT::i32, Op, Undef), AllOnes); + EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, MVT::i32, Undef, Op), AllOnes); + EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, MVT::i32, Undef, Undef), Undef); } TEST_F(SelectionDAGNodeConstructionTest, UMIN) { SDLoc DL; - auto Int32VT = EVT::getIntegerVT(Context, 32); - SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); - SDValue Poison = DAG->getPOISON(Int32VT); - SDValue Undef = DAG->getUNDEF(Int32VT); - SDValue Zero = DAG->getConstant(0, DL, Int32VT); - - EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, Int32VT, Op, Poison), Poison); - EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, Int32VT, Poison, Op), Poison); - EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, Int32VT, Poison, Undef), Poison); - EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, Int32VT, Undef, Poison), Poison); - - EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, Int32VT, Op, Undef), Zero); - EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, Int32VT, Undef, Op), Zero); - EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, Int32VT, Undef, Undef), Undef); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32); + SDValue Poison = DAG->getPOISON(MVT::i32); + SDValue Undef = DAG->getUNDEF(MVT::i32); + SDValue Zero = DAG->getConstant(0, DL, MVT::i32); + + EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, MVT::i32, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, MVT::i32, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, MVT::i32, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, MVT::i32, Undef, Poison), Poison); + + EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, MVT::i32, Op, Undef), Zero); + EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, MVT::i32, Undef, Op), Zero); + EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, MVT::i32, Undef, Undef), Undef); } TEST_F(SelectionDAGNodeConstructionTest, UREM) { SDLoc DL; - EVT Int32VT = EVT::getIntegerVT(Context, 32); - SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); - SDValue Poison = DAG->getPOISON(Int32VT); - SDValue Undef = DAG->getUNDEF(Int32VT); - SDValue Zero = DAG->getConstant(0, DL, Int32VT); - - EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Op, Poison), Poison); - EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Poison, Op), Poison); - EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Poison, Undef), Poison); - EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Undef, Poison), Poison); - - EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Op, Undef), Poison); - EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Undef, Op), Zero); - EXPECT_EQ(DAG->getNode(ISD::UREM, DL, Int32VT, Undef, Undef), Poison); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32); + SDValue Poison = DAG->getPOISON(MVT::i32); + SDValue Undef = DAG->getUNDEF(MVT::i32); + SDValue Zero = DAG->getConstant(0, DL, MVT::i32); + + EXPECT_EQ(DAG->getNode(ISD::UREM, DL, MVT::i32, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::UREM, DL, MVT::i32, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::UREM, DL, MVT::i32, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::UREM, DL, MVT::i32, Undef, Poison), Poison); + + EXPECT_EQ(DAG->getNode(ISD::UREM, DL, MVT::i32, Op, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::UREM, DL, MVT::i32, Undef, Op), Zero); + EXPECT_EQ(DAG->getNode(ISD::UREM, DL, MVT::i32, Undef, Undef), Poison); } TEST_F(SelectionDAGNodeConstructionTest, USUBSAT) { SDLoc DL; - EVT Int32VT = EVT::getIntegerVT(Context, 32); - SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); - SDValue Poison = DAG->getPOISON(Int32VT); - SDValue Undef = DAG->getUNDEF(Int32VT); - SDValue Zero = DAG->getConstant(0, DL, Int32VT); - - EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Op, Poison), Poison); - EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Poison, Op), Poison); - EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Poison, Undef), Poison); - EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Undef, Poison), Poison); - - EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Op, Undef), Zero); - EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Undef, Op), Zero); - EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, Int32VT, Undef, Undef), Undef); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32); + SDValue Poison = DAG->getPOISON(MVT::i32); + SDValue Undef = DAG->getUNDEF(MVT::i32); + SDValue Zero = DAG->getConstant(0, DL, MVT::i32); + + EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, MVT::i32, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, MVT::i32, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, MVT::i32, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, MVT::i32, Undef, Poison), Poison); + + EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, MVT::i32, Op, Undef), Zero); + EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, MVT::i32, Undef, Op), Zero); + EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, MVT::i32, Undef, Undef), Undef); } TEST_F(SelectionDAGNodeConstructionTest, XOR) { SDLoc DL; - EVT Int32VT = EVT::getIntegerVT(Context, 32); - SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); - SDValue Poison = DAG->getPOISON(Int32VT); - SDValue Undef = DAG->getUNDEF(Int32VT); - SDValue Zero = DAG->getConstant(0, DL, Int32VT); - - EXPECT_EQ(DAG->getNode(ISD::XOR, DL, Int32VT, Op, Poison), Poison); - EXPECT_EQ(DAG->getNode(ISD::XOR, DL, Int32VT, Poison, Op), Poison); - EXPECT_EQ(DAG->getNode(ISD::XOR, DL, Int32VT, Poison, Undef), Poison); - EXPECT_EQ(DAG->getNode(ISD::XOR, DL, Int32VT, Undef, Poison), Poison); - - EXPECT_EQ(DAG->getNode(ISD::XOR, DL, Int32VT, Op, Undef), Undef); - EXPECT_EQ(DAG->getNode(ISD::XOR, DL, Int32VT, Undef, Op), Undef); - EXPECT_EQ(DAG->getNode(ISD::XOR, DL, Int32VT, Undef, Undef), Zero); + SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32); + SDValue Poison = DAG->getPOISON(MVT::i32); + SDValue Undef = DAG->getUNDEF(MVT::i32); + SDValue Zero = DAG->getConstant(0, DL, MVT::i32); + + EXPECT_EQ(DAG->getNode(ISD::XOR, DL, MVT::i32, Op, Poison), Poison); + EXPECT_EQ(DAG->getNode(ISD::XOR, DL, MVT::i32, Poison, Op), Poison); + EXPECT_EQ(DAG->getNode(ISD::XOR, DL, MVT::i32, Poison, Undef), Poison); + EXPECT_EQ(DAG->getNode(ISD::XOR, DL, MVT::i32, Undef, Poison), Poison); + + EXPECT_EQ(DAG->getNode(ISD::XOR, DL, MVT::i32, Op, Undef), Undef); + EXPECT_EQ(DAG->getNode(ISD::XOR, DL, MVT::i32, Undef, Op), Undef); + EXPECT_EQ(DAG->getNode(ISD::XOR, DL, MVT::i32, Undef, Undef), Zero); }